PREHOOK: query: -- SORT_QUERY_RESULTS CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest_g2 POSTHOOK: query: -- SORT_QUERY_RESULTS CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_g2 PREHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest_g3 POSTHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_g3 PREHOOK: query: CREATE TABLE dest_g4(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest_g4 POSTHOOK: query: CREATE TABLE dest_g4(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_g4 PREHOOK: query: CREATE TABLE dest_h2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest_h2 POSTHOOK: query: CREATE TABLE dest_h2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_h2 PREHOOK: query: CREATE TABLE dest_h3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest_h3 POSTHOOK: query: CREATE TABLE dest_h3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_h3 PREHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 Stage: Stage-5 Stats-Aggr Operator Stage: Stage-2 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 Stage: Stage-6 Stats-Aggr Operator PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest_g2 PREHOOK: Output: default@dest_g3 PREHOOK: Output: default@dest_g4 POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_g2 POSTHOOK: Output: default@dest_g3 POSTHOOK: Output: default@dest_g4 POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_g3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_g4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: SELECT * FROM dest_g2 PREHOOK: type: QUERY PREHOOK: Input: default@dest_g2 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM dest_g2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g2 #### A masked pattern was here #### 5 6 5397.0 278 10 6 5 6398.0 331 6 7 6 7735.0 447 10 8 8 8762.0 595 10 9 7 91047.0 577 12 PREHOOK: query: SELECT * FROM dest_g3 PREHOOK: type: QUERY PREHOOK: Input: default@dest_g3 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM dest_g3 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g3 #### A masked pattern was here #### 0 1 00.0 0 3 1 71 116414.0 10044 115 2 69 225571.0 15780 111 3 62 332004.0 20119 99 4 74 452763.0 30965 124 PREHOOK: query: SELECT * FROM dest_g4 PREHOOK: type: QUERY PREHOOK: Input: default@dest_g4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM dest_g4 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g4 #### A masked pattern was here #### 0 1 00.0 0 3 1 71 116414.0 10044 115 2 69 225571.0 15780 111 3 62 332004.0 20119 99 4 74 452763.0 30965 124 5 6 5397.0 278 10 6 5 6398.0 331 6 7 6 7735.0 447 10 8 8 8762.0 595 10 9 7 91047.0 577 12 PREHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-5 is a root stage Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 Stage-8 depends on stages: Stage-2 Stage-9 depends on stages: Stage-5 Stage-10 depends on stages: Stage-9 Stage-3 depends on stages: Stage-10 Stage-11 depends on stages: Stage-3 Stage-4 depends on stages: Stage-10 Stage-12 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-5 Map Reduce Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 Stage: Stage-6 Stats-Aggr Operator Stage: Stage-1 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 Stage: Stage-7 Stats-Aggr Operator Stage: Stage-2 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 Stage: Stage-8 Stats-Aggr Operator Stage: Stage-9 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string), substr(value, 5) (type: string) sort order: +++ Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 Stage: Stage-10 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 Stage: Stage-3 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 Stage: Stage-11 Stats-Aggr Operator Stage: Stage-4 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 Stage: Stage-12 Stats-Aggr Operator PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest_g2 PREHOOK: Output: default@dest_g3 PREHOOK: Output: default@dest_g4 PREHOOK: Output: default@dest_h2 PREHOOK: Output: default@dest_h3 POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_g2 POSTHOOK: Output: default@dest_g3 POSTHOOK: Output: default@dest_g4 POSTHOOK: Output: default@dest_h2 POSTHOOK: Output: default@dest_h3 POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_g3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_g4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: SELECT * FROM dest_g2 PREHOOK: type: QUERY PREHOOK: Input: default@dest_g2 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM dest_g2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g2 #### A masked pattern was here #### 5 6 5397.0 278 10 6 5 6398.0 331 6 7 6 7735.0 447 10 8 8 8762.0 595 10 9 7 91047.0 577 12 PREHOOK: query: SELECT * FROM dest_g3 PREHOOK: type: QUERY PREHOOK: Input: default@dest_g3 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM dest_g3 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g3 #### A masked pattern was here #### 0 1 00.0 0 3 1 71 116414.0 10044 115 2 69 225571.0 15780 111 3 62 332004.0 20119 99 4 74 452763.0 30965 124 PREHOOK: query: SELECT * FROM dest_g4 PREHOOK: type: QUERY PREHOOK: Input: default@dest_g4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM dest_g4 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g4 #### A masked pattern was here #### 0 1 00.0 0 3 1 71 116414.0 10044 115 2 69 225571.0 15780 111 3 62 332004.0 20119 99 4 74 452763.0 30965 124 5 6 5397.0 278 10 6 5 6398.0 331 6 7 6 7735.0 447 10 8 8 8762.0 595 10 9 7 91047.0 577 12 PREHOOK: query: SELECT * FROM dest_h2 PREHOOK: type: QUERY PREHOOK: Input: default@dest_h2 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM dest_h2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_h2 #### A masked pattern was here #### 0 1 00.0 0 3 1 4 1878.0 878 6 1 5 1729.0 729 8 1 6 11282.0 1282 12 1 6 11494.0 1494 11 1 7 11171.0 1171 11 1 7 11516.0 1516 10 1 8 11263.0 1263 10 1 9 12294.0 2294 14 1 9 12654.0 2654 16 PREHOOK: query: SELECT * FROM dest_h3 PREHOOK: type: QUERY PREHOOK: Input: default@dest_h3 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM dest_h3 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_h3 #### A masked pattern was here #### 5 1 5102.0 102 2 5 1 5116.0 116 2 5 1 515.0 15 3 5 1 553.0 53 1 5 1 554.0 54 1 5 1 557.0 57 1 6 1 6134.0 134 2 6 1 664.0 64 1 6 1 665.0 65 1 6 1 666.0 66 1 6 1 669.0 69 1 7 1 7144.0 144 2 7 1 7152.0 152 2 7 1 7210.0 210 3 7 1 774.0 74 1 7 1 777.0 77 1 7 1 778.0 78 1 8 1 8166.0 166 2 8 1 8168.0 168 2 8 1 88.0 8 1 8 1 880.0 80 1 8 1 882.0 82 1 8 1 885.0 85 1 8 1 886.0 86 1 8 1 887.0 87 1 9 1 9190.0 190 2 9 1 9194.0 194 2 9 1 9196.0 196 2 9 1 9270.0 270 3 9 1 99.0 9 1 9 1 992.0 92 1 9 1 996.0 96 1 PREHOOK: query: DROP TABLE dest_g2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@dest_g2 PREHOOK: Output: default@dest_g2 POSTHOOK: query: DROP TABLE dest_g2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest_g2 POSTHOOK: Output: default@dest_g2 PREHOOK: query: DROP TABLE dest_g3 PREHOOK: type: DROPTABLE PREHOOK: Input: default@dest_g3 PREHOOK: Output: default@dest_g3 POSTHOOK: query: DROP TABLE dest_g3 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest_g3 POSTHOOK: Output: default@dest_g3 PREHOOK: query: DROP TABLE dest_g4 PREHOOK: type: DROPTABLE PREHOOK: Input: default@dest_g4 PREHOOK: Output: default@dest_g4 POSTHOOK: query: DROP TABLE dest_g4 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest_g4 POSTHOOK: Output: default@dest_g4 PREHOOK: query: DROP TABLE dest_h2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@dest_h2 PREHOOK: Output: default@dest_h2 POSTHOOK: query: DROP TABLE dest_h2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest_h2 POSTHOOK: Output: default@dest_h2 PREHOOK: query: DROP TABLE dest_h3 PREHOOK: type: DROPTABLE PREHOOK: Input: default@dest_h3 PREHOOK: Output: default@dest_h3 POSTHOOK: query: DROP TABLE dest_h3 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest_h3 POSTHOOK: Output: default@dest_h3