PREHOOK: query: -- hash aggregation is disabled

-- There are different cases for Group By depending on map/reduce side, hash aggregation,
-- grouping sets and column stats. If we don't have column stats, we just assume hash
-- aggregation is disabled. Following are the possible cases and rule for cardinality
-- estimation

-- MAP SIDE:
-- Case 1: NO column stats, NO hash aggregation, NO grouping sets — numRows
-- Case 2: NO column stats, NO hash aggregation, grouping sets — numRows * sizeOfGroupingSet
-- Case 3: column stats, hash aggregation, NO grouping sets — Min(numRows / 2, ndvProduct * parallelism)
-- Case 4: column stats, hash aggregation, grouping sets — Min((numRows * sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet)
-- Case 5: column stats, NO hash aggregation, NO grouping sets — numRows
-- Case 6: column stats, NO hash aggregation, grouping sets — numRows * sizeOfGroupingSet

-- REDUCE SIDE:
-- Case 7: NO column stats — numRows / 2
-- Case 8: column stats, grouping sets — Min(numRows, ndvProduct * sizeOfGroupingSet)
-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct)

create table if not exists loc_staging (
  state string,
  locid int,
  zip bigint,
  year int
) row format delimited fields terminated by '|' stored as textfile
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@loc_staging
POSTHOOK: query: -- hash aggregation is disabled

-- There are different cases for Group By depending on map/reduce side, hash aggregation,
-- grouping sets and column stats. If we don't have column stats, we just assume hash
-- aggregation is disabled. Following are the possible cases and rule for cardinality
-- estimation

-- MAP SIDE:
-- Case 1: NO column stats, NO hash aggregation, NO grouping sets — numRows
-- Case 2: NO column stats, NO hash aggregation, grouping sets — numRows * sizeOfGroupingSet
-- Case 3: column stats, hash aggregation, NO grouping sets — Min(numRows / 2, ndvProduct * parallelism)
-- Case 4: column stats, hash aggregation, grouping sets — Min((numRows * sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet)
-- Case 5: column stats, NO hash aggregation, NO grouping sets — numRows
-- Case 6: column stats, NO hash aggregation, grouping sets — numRows * sizeOfGroupingSet

-- REDUCE SIDE:
-- Case 7: NO column stats — numRows / 2
-- Case 8: column stats, grouping sets — Min(numRows, ndvProduct * sizeOfGroupingSet)
-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct)

create table if not exists loc_staging (
  state string,
  locid int,
  zip bigint,
  year int
) row format delimited fields terminated by '|' stored as textfile
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@loc_staging
PREHOOK: query: create table loc_orc like loc_staging
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@loc_orc
POSTHOOK: query: create table loc_orc like loc_staging
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@loc_orc
PREHOOK: query: alter table loc_orc set fileformat orc
PREHOOK: type: ALTERTABLE_FILEFORMAT
PREHOOK: Input: default@loc_orc
PREHOOK: Output: default@loc_orc
POSTHOOK: query: alter table loc_orc set fileformat orc
POSTHOOK: type: ALTERTABLE_FILEFORMAT
POSTHOOK: Input: default@loc_orc
POSTHOOK: Output: default@loc_orc
PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@loc_staging
POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@loc_staging
PREHOOK: query: insert overwrite table loc_orc select * from loc_staging
PREHOOK: type: QUERY
PREHOOK: Input: default@loc_staging
PREHOOK: Output: default@loc_orc
POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging
POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_staging
POSTHOOK: Output: default@loc_orc
POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
PREHOOK: query: -- numRows: 8 rawDataSize: 796
explain select * from loc_orc
PREHOOK: type: QUERY
POSTHOOK: query: -- numRows: 8 rawDataSize: 796
explain select * from loc_orc
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        TableScan
          alias: loc_orc
          Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
          Select Operator
            expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
            outputColumnNames: _col0, _col1, _col2, _col3
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            ListSink

PREHOOK: query: -- partial column stats
analyze table loc_orc compute statistics for columns state
PREHOOK: type: QUERY
PREHOOK: Input: default@loc_orc
#### A masked pattern was here ####
POSTHOOK: query: -- partial column stats
analyze table loc_orc compute statistics for columns state
POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
PREHOOK: query: -- inner group by: map - numRows: 8 reduce - numRows: 4
-- outer group by: map - numRows: 4 reduce numRows: 2
explain select a, c, min(b)
from ( select state as a, locid as b, count(*) as c
       from loc_orc
       group by state,locid
     ) sq1
group by a,c
PREHOOK: type: QUERY
POSTHOOK: query: -- inner group by: map - numRows: 8 reduce - numRows: 4
-- outer group by: map - numRows: 4 reduce numRows: 2
explain select a, c, min(b)
from ( select state as a, locid as b, count(*) as c
       from loc_orc
       group by state,locid
     ) sq1
group by a,c
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1
  Stage-0 depends on stages: Stage-2

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
              Group By Operator
                aggregations: count()
                keys: _col0 (type: string), _col1 (type: int)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int)
                  sort order: ++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
                  Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL
                  value expressions: _col2 (type: bigint)
      Reduce Operator Tree:
        Group By Operator
          aggregations: count(VALUE._col0)
          keys: KEY._col0 (type: string), KEY._col1 (type: int)
          mode: mergepartial
          outputColumnNames: _col0, _col1, _col2
          Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL
          Select Operator
            expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: int)
            outputColumnNames: _col0, _col1, _col2
            Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL
            Group By Operator
              aggregations: min(_col2)
              keys: _col0 (type: string), _col1 (type: bigint)
              mode: hash
              outputColumnNames: _col0, _col1, _col2
              Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
              File Output Operator
                compressed: false
                table:
                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-2
    Map Reduce
      Map Operator Tree:
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string), _col1 (type: bigint)
              sort order: ++
              Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
              Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
              value expressions: _col2 (type: int)
      Reduce Operator Tree:
        Group By Operator
          aggregations: min(VALUE._col0)
          keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
          mode: mergepartial
          outputColumnNames: _col0, _col1, _col2
          Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
          File Output Operator
            compressed: false
            Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year
PREHOOK: type: QUERY
PREHOOK: Input: default@loc_orc
#### A masked pattern was here ####
POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year
POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
PREHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select year from loc_orc group by year
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select year from loc_orc group by year
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: year (type: int)
              outputColumnNames: _col0
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: int)
                mode: hash
                outputColumnNames: _col0
                Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: int)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: int)
                  Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: int)
          mode: mergepartial
          outputColumnNames: _col0
          Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
          File Output Operator
            compressed: false
            Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 9: column stats, NO grouping sets - caridnality = 8
explain select state,locid from loc_orc group by state,locid
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 9: column stats, NO grouping sets - caridnality = 8
explain select state,locid from loc_orc group by state,locid
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int)
                mode: hash
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int)
                  sort order: ++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
                  Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
          File Output Operator
            compressed: false
            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32
-- Case 8: column stats, grouping sets - cardinality = 32
explain select state,locid from loc_orc group by state,locid with cube
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32
-- Case 8: column stats, grouping sets - cardinality = 32
explain select state,locid from loc_orc group by state,locid with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid with rollup
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid with rollup
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8
-- Case 8: column stats, grouping sets - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state))
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8
-- Case 8: column stats, grouping sets - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state))
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 16
-- Case 8: column stats, grouping sets - cardinality = 16
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 16
-- Case 8: column stats, grouping sets - cardinality = 16
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32
-- Case 8: column stats, grouping sets - cardinality = 32
explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32
-- Case 8: column stats, grouping sets - cardinality = 32
explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- map-side parallelism will be 10

-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select year from loc_orc group by year
PREHOOK: type: QUERY
POSTHOOK: query: -- map-side parallelism will be 10

-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select year from loc_orc group by year
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: year (type: int)
              outputColumnNames: _col0
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: int)
                mode: hash
                outputColumnNames: _col0
                Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: int)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: int)
                  Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: int)
          mode: mergepartial
          outputColumnNames: _col0
          Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
          File Output Operator
            compressed: false
            Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - cardinality = 16
-- Case 8: column stats, grouping sets - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - cardinality = 16
-- Case 8: column stats, grouping sets - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- ndvProduct becomes 0 as zip does not have column stats
-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select state,zip from loc_orc group by state,zip
PREHOOK: type: QUERY
POSTHOOK: query: -- ndvProduct becomes 0 as zip does not have column stats
-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select state,zip from loc_orc group by state,zip
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
            Select Operator
              expressions: state (type: string), zip (type: bigint)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
              Group By Operator
                keys: _col0 (type: string), _col1 (type: bigint)
                mode: hash
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: bigint)
                  sort order: ++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
                  Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
          File Output Operator
            compressed: false
            Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid with rollup
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid with rollup
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 7: NO column stats - cardinality = 4
explain select state,locid from loc_orc group by state,locid grouping sets((state))
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 7: NO column stats - cardinality = 4
explain select state,locid from loc_orc group by state,locid grouping sets((state))
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 16
-- Case 7: NO column stats - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 16
-- Case 7: NO column stats - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 7: NO column stats - cardinality = 4
explain select year from loc_orc group by year
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 7: NO column stats - cardinality = 4
explain select year from loc_orc group by year
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: year (type: int)
              outputColumnNames: _col0
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                keys: _col0 (type: int)
                mode: hash
                outputColumnNames: _col0
                Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: int)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: int)
                  Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: int)
          mode: mergepartial
          outputColumnNames: _col0
          Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
          File Output Operator
            compressed: false
            Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
PREHOOK: type: QUERY
POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: loc_orc
            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: state (type: string), locid (type: int)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
              Group By Operator
                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
                  Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
      Reduce Operator Tree:
        Group By Operator
          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
          pruneGroupingSetId: true
          File Output Operator
            compressed: false
            Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink