PREHOOK: query: DROP TABLE flights_tiny PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE flights_tiny POSTHOOK: type: DROPTABLE PREHOOK: query: create table flights_tiny ( ORIGIN_CITY_NAME string, DEST_CITY_NAME string, YEAR int, MONTH int, DAY_OF_MONTH int, ARR_DELAY float, FL_NUM string ) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@flights_tiny POSTHOOK: query: create table flights_tiny ( ORIGIN_CITY_NAME string, DEST_CITY_NAME string, YEAR int, MONTH int, DAY_OF_MONTH int, ARR_DELAY float, FL_NUM string ) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@flights_tiny PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@flights_tiny POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@flights_tiny PREHOOK: query: -- SORT_QUERY_RESULTS -- 1. basic Matchpath test explain select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on flights_tiny distribute by fl_num sort by year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- 1. basic Matchpath test explain select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on flights_tiny distribute by fl_num sort by year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: flights_tiny Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: ++++ Map-reduce partition columns: fl_num (type: string) Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col2, _col3, _col4 output shape: tpath: int partition by: _col6 raw input shape: referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on flights_tiny distribute by fl_num sort by year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) PREHOOK: type: QUERY PREHOOK: Input: default@flights_tiny #### A masked pattern was here #### POSTHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on flights_tiny distribute by fl_num sort by year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) POSTHOOK: type: QUERY POSTHOOK: Input: default@flights_tiny #### A masked pattern was here #### Baltimore 1142 2010 10 20 6 20 Baltimore 1142 2010 10 21 5 21 Baltimore 1142 2010 10 22 4 22 Baltimore 1142 2010 10 25 3 25 Baltimore 1142 2010 10 26 2 26 Baltimore 1599 2010 10 21 2 21 Baltimore 1599 2010 10 25 3 25 Baltimore 1599 2010 10 26 2 26 Chicago 1531 2010 10 21 2 21 Chicago 1531 2010 10 25 3 25 Chicago 1531 2010 10 26 2 26 Chicago 361 2010 10 20 2 20 Chicago 897 2010 10 20 4 20 Chicago 897 2010 10 21 3 21 Chicago 897 2010 10 22 2 22 Washington 7291 2010 10 27 2 27 PREHOOK: query: -- 2. Matchpath on 1 partition explain select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on flights_tiny sort by fl_num, year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) where fl_num = 1142 PREHOOK: type: QUERY POSTHOOK: query: -- 2. Matchpath on 1 partition explain select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on flights_tiny sort by fl_num, year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) where fl_num = 1142 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: flights_tiny Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 output shape: tpath: int partition by: 0 raw input shape: referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = 1142) (type: boolean) Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on flights_tiny sort by fl_num, year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) where fl_num = 1142 PREHOOK: type: QUERY PREHOOK: Input: default@flights_tiny #### A masked pattern was here #### POSTHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on flights_tiny sort by fl_num, year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) where fl_num = 1142 POSTHOOK: type: QUERY POSTHOOK: Input: default@flights_tiny #### A masked pattern was here #### Baltimore 1142 2010 10 20 6 20 Baltimore 1142 2010 10 21 5 21 Baltimore 1142 2010 10 22 4 22 Baltimore 1142 2010 10 25 3 25 Baltimore 1142 2010 10 26 2 26 PREHOOK: query: -- 3. empty partition. explain select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on (select * from flights_tiny where fl_num = -1142) flights_tiny sort by fl_num, year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) PREHOOK: type: QUERY POSTHOOK: query: -- 3. empty partition. explain select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on (select * from flights_tiny where fl_num = -1142) flights_tiny sort by fl_num, year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: flights_tiny Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = -1142) (type: boolean) Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) outputColumnNames: _col0, _col2, _col3, _col4, _col5 Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col5 (type: float) Reducer 2 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny output shape: type: SUBQUERY Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 output shape: tpath: int partition by: 0 raw input shape: referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on (select * from flights_tiny where fl_num = -1142) flights_tiny sort by fl_num, year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) PREHOOK: type: QUERY PREHOOK: Input: default@flights_tiny #### A masked pattern was here #### POSTHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from matchpath(on (select * from flights_tiny where fl_num = -1142) flights_tiny sort by fl_num, year, month, day_of_month arg1('LATE.LATE+'), arg2('LATE'), arg3(arr_delay > 15), arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) POSTHOOK: type: QUERY POSTHOOK: Input: default@flights_tiny #### A masked pattern was here ####