PREHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), avg(substr(src.value,5)), avg(DISTINCT substr(src.value,5)), max(substr(src.value,5)), min(substr(src.value,5)), std(substr(src.value,5)), stddev_samp(substr(src.value,5)), variance(substr(src.value,5)), var_samp(substr(src.value,5)) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), avg(substr(src.value,5)), avg(DISTINCT substr(src.value,5)), max(substr(src.value,5)), min(substr(src.value,5)), std(substr(src.value,5)), stddev_samp(substr(src.value,5)), variance(substr(src.value,5)), var_samp(substr(src.value,5)) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION std (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION stddev_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION var_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: src TableScan alias: src Select Operator expressions: expr: value type: string outputColumnNames: value Group By Operator aggregations: expr: sum(substr(value, 5)) expr: avg(substr(value, 5)) expr: avg(DISTINCT substr(value, 5)) expr: max(substr(value, 5)) expr: min(substr(value, 5)) expr: std(substr(value, 5)) expr: stddev_samp(substr(value, 5)) expr: variance(substr(value, 5)) expr: var_samp(substr(value, 5)) bucketGroup: false keys: expr: substr(value, 5) type: string mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: -1 value expressions: expr: _col1 type: double expr: _col2 type: struct expr: _col3 type: struct expr: _col4 type: string expr: _col5 type: string expr: _col6 type: struct expr: _col7 type: struct expr: _col8 type: struct expr: _col9 type: struct Reduce Operator Tree: Group By Operator aggregations: expr: sum(VALUE._col0) expr: avg(VALUE._col1) expr: avg(DISTINCT KEY._col0:0._col0) expr: max(VALUE._col3) expr: min(VALUE._col4) expr: std(VALUE._col5) expr: stddev_samp(VALUE._col6) expr: variance(VALUE._col7) expr: var_samp(VALUE._col8) bucketGroup: false mode: partials outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: file:/tmp/sdong/hive_2011-02-10_01-50-36_160_9079459775422143866/-mr-10002 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: double expr: _col1 type: struct expr: _col2 type: struct expr: _col3 type: string expr: _col4 type: string expr: _col5 type: struct expr: _col6 type: struct expr: _col7 type: struct expr: _col8 type: struct Reduce Operator Tree: Group By Operator aggregations: expr: sum(VALUE._col0) expr: avg(VALUE._col1) expr: avg(VALUE._col2) expr: max(VALUE._col3) expr: min(VALUE._col4) expr: std(VALUE._col5) expr: stddev_samp(VALUE._col6) expr: variance(VALUE._col7) expr: var_samp(VALUE._col8) bucketGroup: false mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Operator expressions: expr: _col0 type: double expr: _col1 type: double expr: _col2 type: double expr: _col3 type: string expr: _col4 type: string expr: _col5 type: double expr: _col6 type: double expr: _col7 type: double expr: _col8 type: double outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Operator expressions: expr: _col0 type: double expr: _col1 type: double expr: _col2 type: double expr: UDFToDouble(_col3) type: double expr: UDFToDouble(_col4) type: double expr: _col5 type: double expr: _col6 type: double expr: _col7 type: double expr: _col8 type: double outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), avg(substr(src.value,5)), avg(DISTINCT substr(src.value,5)), max(substr(src.value,5)), min(substr(src.value,5)), std(substr(src.value,5)), stddev_samp(substr(src.value,5)), variance(substr(src.value,5)), var_samp(substr(src.value,5)) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), avg(substr(src.value,5)), avg(DISTINCT substr(src.value,5)), max(substr(src.value,5)), min(substr(src.value,5)), std(substr(src.value,5)), stddev_samp(substr(src.value,5)), variance(substr(src.value,5)), var_samp(substr(src.value,5)) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-50-44_349_2755110813259006506/-mr-10000 POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-50-44_349_2755110813259006506/-mr-10000 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] 130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876 20469.01089779559