PREHOOK: query: -- This file is used to show plans of queries involving cluster by, distribute by,
-- order by, and sort by.
-- Right now, Correlation optimizer check the most restrictive condition
-- when determining if a ReduceSinkOperator is not necessary.
-- This condition is that two ReduceSinkOperators should have same sorting columns,
-- same partitioning columns, same sorting orders and no conflict on the numbers of reducers. 

-- Distribute by will not be optimized because distribute by does not introduce
-- sorting columns.
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: -- This file is used to show plans of queries involving cluster by, distribute by,
-- order by, and sort by.
-- Right now, Correlation optimizer check the most restrictive condition
-- when determining if a ReduceSinkOperator is not necessary.
-- This condition is that two ReduceSinkOperators should have same sorting columns,
-- same partitioning columns, same sorting orders and no conflict on the numbers of reducers. 

-- Distribute by will not be optimized because distribute by does not introduce
-- sorting columns.
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1, Stage-3
  Stage-3 is a root stage
  Stage-0 depends on stages: Stage-2

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  sort order: 
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col0 (type: string), _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-2
    Map Reduce
      Map Operator Tree:
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          keys:
            0 _col0 (type: string)
            1 _col0 (type: string)
          outputColumnNames: _col0, _col1, _col2, _col3
          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-3
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  sort order: 
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col0 (type: string), _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Sort by will not be optimized because sort by does not introduce partitioning columns
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y SORT BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: -- Sort by will not be optimized because sort by does not introduce partitioning columns
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y SORT BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1, Stage-3
  Stage-3 is a root stage
  Stage-0 depends on stages: Stage-2

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-2
    Map Reduce
      Map Operator Tree:
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          keys:
            0 _col0 (type: string)
            1 _col0 (type: string)
          outputColumnNames: _col0, _col1, _col2, _col3
          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-3
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Distribute by and sort by on the same key(s) should be optimized
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: -- Distribute by and sort by on the same key(s) should be optimized
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1, Stage-3
  Stage-3 is a root stage
  Stage-0 depends on stages: Stage-2

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-2
    Map Reduce
      Map Operator Tree:
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          keys:
            0 _col0 (type: string)
            1 _col0 (type: string)
          outputColumnNames: _col0, _col1, _col2, _col3
          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-3
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
#### A masked pattern was here ####
POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
#### A masked pattern was here ####
128	val_128	128	
128	val_128	128	
128	val_128	128	
146	val_146	146	val_146
146	val_146	146	val_146
150	val_150	150	val_150
213	val_213	213	val_213
213	val_213	213	val_213
224	val_224	224	
224	val_224	224	
238	val_238	238	val_238
238	val_238	238	val_238
255	val_255	255	val_255
255	val_255	255	val_255
273	val_273	273	val_273
273	val_273	273	val_273
273	val_273	273	val_273
278	val_278	278	val_278
278	val_278	278	val_278
311	val_311	311	val_311
311	val_311	311	val_311
311	val_311	311	val_311
369	val_369	369	
369	val_369	369	
369	val_369	369	
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
406	val_406	406	val_406
406	val_406	406	val_406
406	val_406	406	val_406
406	val_406	406	val_406
66	val_66	66	val_66
98	val_98	98	val_98
98	val_98	98	val_98
PREHOOK: query: EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Demux Operator
          Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
          Select Operator
            expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
            outputColumnNames: _col0, _col1
            Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
            Mux Operator
              Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE
              Join Operator
                condition map:
                     Inner Join 0 to 1
                keys:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1, _col2, _col3
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                File Output Operator
                  compressed: false
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          Select Operator
            expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
            outputColumnNames: _col0, _col1
            Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
            Mux Operator
              Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE
              Join Operator
                condition map:
                     Inner Join 0 to 1
                keys:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1, _col2, _col3
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                File Output Operator
                  compressed: false
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
#### A masked pattern was here ####
POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
#### A masked pattern was here ####
128	val_128	128	
128	val_128	128	
128	val_128	128	
146	val_146	146	val_146
146	val_146	146	val_146
150	val_150	150	val_150
213	val_213	213	val_213
213	val_213	213	val_213
224	val_224	224	
224	val_224	224	
238	val_238	238	val_238
238	val_238	238	val_238
255	val_255	255	val_255
255	val_255	255	val_255
273	val_273	273	val_273
273	val_273	273	val_273
273	val_273	273	val_273
278	val_278	278	val_278
278	val_278	278	val_278
311	val_311	311	val_311
311	val_311	311	val_311
311	val_311	311	val_311
369	val_369	369	
369	val_369	369	
369	val_369	369	
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
406	val_406	406	val_406
406	val_406	406	val_406
406	val_406	406	val_406
406	val_406	406	val_406
66	val_66	66	val_66
98	val_98	98	val_98
98	val_98	98	val_98
PREHOOK: query: -- Because for join we use ascending order, if sort by uses descending order,
-- this query will not be optimized
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key DESC) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key DESC) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: -- Because for join we use ascending order, if sort by uses descending order,
-- this query will not be optimized
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key DESC) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key DESC) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1, Stage-3
  Stage-3 is a root stage
  Stage-0 depends on stages: Stage-2

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: -
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-2
    Map Reduce
      Map Operator Tree:
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          keys:
            0 _col0 (type: string)
            1 _col0 (type: string)
          outputColumnNames: _col0, _col1, _col2, _col3
          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-3
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: -
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Even if hive.optimize.reducededuplication.min.reducer=1, order by will not be optimized
-- because order by does not introduce partitioning columns
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x ORDER BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y ORDER BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: -- Even if hive.optimize.reducededuplication.min.reducer=1, order by will not be optimized
-- because order by does not introduce partitioning columns
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x ORDER BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y ORDER BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1, Stage-3
  Stage-3 is a root stage
  Stage-0 depends on stages: Stage-2

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-2
    Map Reduce
      Map Operator Tree:
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          keys:
            0 _col0 (type: string)
            1 _col0 (type: string)
          outputColumnNames: _col0, _col1, _col2, _col3
          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-3
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Cluster by will be optimized
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: -- Cluster by will be optimized
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1, Stage-3
  Stage-3 is a root stage
  Stage-0 depends on stages: Stage-2

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-2
    Map Reduce
      Map Operator Tree:
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          keys:
            0 _col0 (type: string)
            1 _col0 (type: string)
          outputColumnNames: _col0, _col1, _col2, _col3
          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-3
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
#### A masked pattern was here ####
POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
#### A masked pattern was here ####
128	val_128	128	
128	val_128	128	
128	val_128	128	
146	val_146	146	val_146
146	val_146	146	val_146
150	val_150	150	val_150
213	val_213	213	val_213
213	val_213	213	val_213
224	val_224	224	
224	val_224	224	
238	val_238	238	val_238
238	val_238	238	val_238
255	val_255	255	val_255
255	val_255	255	val_255
273	val_273	273	val_273
273	val_273	273	val_273
273	val_273	273	val_273
278	val_278	278	val_278
278	val_278	278	val_278
311	val_311	311	val_311
311	val_311	311	val_311
311	val_311	311	val_311
369	val_369	369	
369	val_369	369	
369	val_369	369	
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
406	val_406	406	val_406
406	val_406	406	val_406
406	val_406	406	val_406
406	val_406	406	val_406
66	val_66	66	val_66
98	val_98	98	val_98
98	val_98	98	val_98
PREHOOK: query: EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Demux Operator
          Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
          Select Operator
            expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
            outputColumnNames: _col0, _col1
            Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
            Mux Operator
              Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE
              Join Operator
                condition map:
                     Inner Join 0 to 1
                keys:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1, _col2, _col3
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                File Output Operator
                  compressed: false
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          Select Operator
            expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
            outputColumnNames: _col0, _col1
            Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
            Mux Operator
              Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE
              Join Operator
                condition map:
                     Inner Join 0 to 1
                keys:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1, _col2, _col3
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                File Output Operator
                  compressed: false
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
#### A masked pattern was here ####
POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx
JOIN
(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
#### A masked pattern was here ####
128	val_128	128	
128	val_128	128	
128	val_128	128	
146	val_146	146	val_146
146	val_146	146	val_146
150	val_150	150	val_150
213	val_213	213	val_213
213	val_213	213	val_213
224	val_224	224	
224	val_224	224	
238	val_238	238	val_238
238	val_238	238	val_238
255	val_255	255	val_255
255	val_255	255	val_255
273	val_273	273	val_273
273	val_273	273	val_273
273	val_273	273	val_273
278	val_278	278	val_278
278	val_278	278	val_278
311	val_311	311	val_311
311	val_311	311	val_311
311	val_311	311	val_311
369	val_369	369	
369	val_369	369	
369	val_369	369	
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
401	val_401	401	val_401
406	val_406	406	val_406
406	val_406	406	val_406
406	val_406	406	val_406
406	val_406	406	val_406
66	val_66	66	val_66
98	val_98	98	val_98
98	val_98	98	val_98
PREHOOK: query: -- If hive.optimize.reducededuplication.min.reducer=1,
-- group by and then order by should be optimized
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx
JOIN
(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: -- If hive.optimize.reducededuplication.min.reducer=1,
-- group by and then order by should be optimized
EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx
JOIN
(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1, Stage-3
  Stage-3 is a root stage
  Stage-0 depends on stages: Stage-2

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
      Reduce Operator Tree:
        Select Operator
          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-2
    Map Reduce
      Map Operator Tree:
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: string)
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string)
              sort order: +
              Map-reduce partition columns: _col0 (type: string)
              Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
              value expressions: _col1 (type: bigint)
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          keys:
            0 _col0 (type: string)
            1 _col0 (type: string)
          outputColumnNames: _col0, _col1, _col2, _col3
          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-3
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                aggregations: count()
                keys: key (type: string)
                mode: hash
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: bigint)
      Reduce Operator Tree:
        Group By Operator
          aggregations: count(VALUE._col0)
          keys: KEY._col0 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx
JOIN
(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
#### A masked pattern was here ####
POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx
JOIN
(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
#### A masked pattern was here ####
128	val_128	128	1
128	val_128	128	1
128	val_128	128	1
146	val_146	146	1
146	val_146	146	1
150	val_150	150	1
213	val_213	213	1
213	val_213	213	1
224	val_224	224	1
224	val_224	224	1
238	val_238	238	1
238	val_238	238	1
255	val_255	255	1
255	val_255	255	1
273	val_273	273	1
273	val_273	273	1
273	val_273	273	1
278	val_278	278	1
278	val_278	278	1
311	val_311	311	1
311	val_311	311	1
311	val_311	311	1
369	val_369	369	1
369	val_369	369	1
369	val_369	369	1
401	val_401	401	1
401	val_401	401	1
401	val_401	401	1
401	val_401	401	1
401	val_401	401	1
406	val_406	406	1
406	val_406	406	1
406	val_406	406	1
406	val_406	406	1
66	val_66	66	1
98	val_98	98	1
98	val_98	98	1
PREHOOK: query: EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx
JOIN
(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx
JOIN
(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: x
            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: key (type: string), value (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: string)
          TableScan
            alias: y
            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                aggregations: count()
                keys: key (type: string)
                mode: hash
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
                  value expressions: _col1 (type: bigint)
      Reduce Operator Tree:
        Demux Operator
          Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
          Select Operator
            expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
            outputColumnNames: _col0, _col1
            Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
            Mux Operator
              Statistics: Num rows: 394 Data size: 4127 Basic stats: COMPLETE Column stats: NONE
              Join Operator
                condition map:
                     Inner Join 0 to 1
                keys:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1, _col2, _col3
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                File Output Operator
                  compressed: false
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          Group By Operator
            aggregations: count(VALUE._col0)
            keys: KEY._col0 (type: string)
            mode: mergepartial
            outputColumnNames: _col0, _col1
            Statistics: Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
            Mux Operator
              Statistics: Num rows: 394 Data size: 4127 Basic stats: COMPLETE Column stats: NONE
              Join Operator
                condition map:
                     Inner Join 0 to 1
                keys:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1, _col2, _col3
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                File Output Operator
                  compressed: false
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx
JOIN
(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy
ON (xx.key=yy.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
#### A masked pattern was here ####
POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value
FROM
(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx
JOIN
(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy
ON (xx.key=yy.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
#### A masked pattern was here ####
128	val_128	128	1
128	val_128	128	1
128	val_128	128	1
146	val_146	146	1
146	val_146	146	1
150	val_150	150	1
213	val_213	213	1
213	val_213	213	1
224	val_224	224	1
224	val_224	224	1
238	val_238	238	1
238	val_238	238	1
255	val_255	255	1
255	val_255	255	1
273	val_273	273	1
273	val_273	273	1
273	val_273	273	1
278	val_278	278	1
278	val_278	278	1
311	val_311	311	1
311	val_311	311	1
311	val_311	311	1
369	val_369	369	1
369	val_369	369	1
369	val_369	369	1
401	val_401	401	1
401	val_401	401	1
401	val_401	401	1
401	val_401	401	1
401	val_401	401	1
406	val_406	406	1
406	val_406	406	1
406	val_406	406	1
406	val_406	406	1
66	val_66	66	1
98	val_98	98	1
98	val_98	98	1