PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@srcbucket_mapjoin POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tab_part POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tab_part PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin_part PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part PREHOOK: type: QUERY PREHOOK: Input: default@srcbucket_mapjoin_part PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: Output: default@tab_part@ds=2008-04-08 POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_part POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: Output: default@tab_part@ds=2008-04-08 POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tab POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tab PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') select key,value from srcbucket_mapjoin PREHOOK: type: QUERY PREHOOK: Input: default@srcbucket_mapjoin PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: Output: default@tab@ds=2008-04-08 POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') select key,value from srcbucket_mapjoin POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: Output: default@tab@ds=2008-04-08 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: explain select count(*) from tab s1 join tab s3 on s1.key=s3.key PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) from tab s1 join tab s3 on s1.key=s3.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: s3 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: s1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: explain select count(*) from (select rt1.id from (select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 join (select rt2.id from (select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 where vt1.id=vt2.id PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) from (select rt1.id from (select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 join (select rt2.id from (select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 where vt1.id=vt2.id POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = _col1) (type: boolean) Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select count(*) from (select rt1.id from (select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 join (select rt2.id from (select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 where vt1.id=vt2.id PREHOOK: type: QUERY PREHOOK: Input: default@tab PREHOOK: Input: default@tab@ds=2008-04-08 PREHOOK: Input: default@tab_part PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: select count(*) from (select rt1.id from (select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 join (select rt2.id from (select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 where vt1.id=vt2.id POSTHOOK: type: QUERY POSTHOOK: Input: default@tab POSTHOOK: Input: default@tab@ds=2008-04-08 POSTHOOK: Input: default@tab_part POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### 480