PREHOOK: query: -- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by filesink create table tmptable(key string, value int) PREHOOK: type: CREATETABLE POSTHOOK: query: -- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by filesink create table tmptable(key string, value int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: explain insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst2' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst3' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-7, Stage-8 Stage-6 depends on stages: Stage-2 , consists of Stage-5, Stage-4 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4 Stage-3 depends on stages: Stage-0 Stage-4 Stage-7 is a root stage Stage-8 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: null-subquery1-subquery1:unionsrc-subquery1-subquery1:s1 TableScan alias: s1 Select Operator Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: 'tst1' type: string expr: _col0 type: bigint outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: file:/tmp/njain/hive_2011-12-07_18-02-38_989_3821093086240595817/-mr-10002 Union Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable file:/tmp/njain/hive_2011-12-07_18-02-38_989_3821093086240595817/-mr-10004 Union Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable file:/tmp/njain/hive_2011-12-07_18-02-38_989_3821093086240595817/-mr-10005 Union Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable Stage: Stage-6 Conditional Operator Stage: Stage-5 Move Operator files: hdfs directory: true destination: pfile:/data/users/njain/hive_commit2/build/ql/scratchdir/hive_2011-12-07_18-02-38_989_3821093086240595817/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable Stage: Stage-3 Stats-Aggr Operator Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: pfile:/data/users/njain/hive_commit2/build/ql/scratchdir/hive_2011-12-07_18-02-38_989_3821093086240595817/-ext-10003 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: null-subquery1-subquery2:unionsrc-subquery1-subquery2:s2 TableScan alias: s2 Select Operator Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: 'tst2' type: string expr: _col0 type: bigint outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-8 Map Reduce Alias -> Map Operator Tree: null-subquery2:unionsrc-subquery2:s3 TableScan alias: s3 Select Operator Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: 'tst3' type: string expr: _col0 type: bigint outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@tmptable POSTHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.key EXPRESSION [] POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, (src)s3.null, ] PREHOOK: query: select * from tmptable x sort by x.key PREHOOK: type: QUERY PREHOOK: Input: default@tmptable PREHOOK: Output: file:/tmp/njain/hive_2011-12-07_18-02-53_260_7981936505385901161/-mr-10000 POSTHOOK: query: select * from tmptable x sort by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tmptable POSTHOOK: Output: file:/tmp/njain/hive_2011-12-07_18-02-53_260_7981936505385901161/-mr-10000 POSTHOOK: Lineage: tmptable.key EXPRESSION [] POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, (src)s3.null, ] tst1 500 tst2 500 tst3 500