Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| kb:hive [2018/11/13 12:30] – yehuda | kb:hive [2022/01/03 16:03] (current) – external edit 127.0.0.1 | ||
|---|---|---|---|
| Line 4: | Line 4: | ||
| <code sql> | <code sql> | ||
| - | -- In order to support an on-going reconciliation between current records in HIVE and new change records, two tables should be defined: base_table and incremental_table | + | -- |
| + | -- In order to support an on-going reconciliation between current records in HIVE and new change records, two tables | ||
| + | -- should be defined: base_table and incremental_table | ||
| + | -- | ||
| -- base_table | -- base_table | ||
| - | -- The example below shows DDL for the Hive table “base_table” that will include any delimited files located in HDFS under the ‘/ | + | -- The example below shows DDL for the Hive table “base_table” that will include any delimited files located in |
| - | + | -- HDFS under the ‘/ | |
| - | + | -- source system. After the first processing run, it will house the on-going, most up-to-date set of records from the source system: | |
| + | -- | ||
| CREATE TABLE base_table ( | CREATE TABLE base_table ( | ||
| - | |||
| id string, | id string, | ||
| - | + | field1 string, field2 string, field3 string, field4 string, field5 string, | |
| - | field1 string, | + | |
| - | + | ||
| - | field2 string, | + | |
| - | + | ||
| - | field3 string, | + | |
| - | + | ||
| - | field4 string, | + | |
| - | + | ||
| - | field5 string, | + | |
| modified_date string | modified_date string | ||
| - | |||
| ) | ) | ||
| - | |||
| ROW FORMAT DELIMITED | ROW FORMAT DELIMITED | ||
| - | |||
| FIELDS TERMINATED BY ',' | FIELDS TERMINATED BY ',' | ||
| - | |||
| LOCATION '/ | LOCATION '/ | ||
| -- # | -- # | ||
| - | -- The DDL below shows an external Hive table “incremental_table” that will include any delimited files with incremental change records, located in HDFS under the ‘/ | + | -- The DDL below shows an external Hive table “incremental_table” that will include |
| - | + | -- any delimited files with incremental change records, located in HDFS under the ‘/ | |
| - | + | -- directory: | |
| CREATE EXTERNAL TABLE incremental_table ( | CREATE EXTERNAL TABLE incremental_table ( | ||
| - | |||
| id string, | id string, | ||
| - | + | field1 string, field2 string, field3 string, field4 string, field5 string, | |
| - | field1 string, | + | |
| - | + | ||
| - | field2 string, | + | |
| - | + | ||
| - | field3 string, | + | |
| - | + | ||
| - | field4 string, | + | |
| - | + | ||
| - | field5 string, | + | |
| modified_date string | modified_date string | ||
| - | |||
| ) | ) | ||
| - | |||
| ROW FORMAT DELIMITED | ROW FORMAT DELIMITED | ||
| - | |||
| FIELDS TERMINATED BY ',' | FIELDS TERMINATED BY ',' | ||
| - | |||
| LOCATION '/ | LOCATION '/ | ||
| -- # | -- # | ||
| - | -- This view combines record sets from both the Base (base_table) and Change (incremental_table) tables and is reduced only to the most recent records for each unique “id”. | + | -- This view combines record sets from both the Base (base_table) and |
| + | -- Change (incremental_table) tables and is reduced only to the most recent records for each unique “id”. | ||
| -- It is defined as follows: | -- It is defined as follows: | ||
| - | |||
| - | |||
| CREATE VIEW reconcile_view AS | CREATE VIEW reconcile_view AS | ||
| - | |||
| SELECT t1.* FROM | SELECT t1.* FROM | ||
| - | + | ( | |
| - | (SELECT * FROM base_table | + | |
| - | + | | |
| - | UNION ALL | + | |
| - | + | ) t1 | |
| - | SELECT * FROM incremental_table) t1 | + | |
| JOIN | JOIN | ||
| - | (SELECT id, max(modified_date) max_modified FROM | + | ( |
| - | + | SELECT id, max(modified_date) max_modified | |
| - | (SELECT * FROM base_table | + | FROM |
| - | + | ( | |
| - | UNION ALL | + | SELECT * FROM base_table |
| - | + | UNION ALL | |
| - | SELECT * FROM incremental_table) t2 | + | SELECT * FROM incremental_table |
| - | + | ) t2 | |
| - | GROUP BY id) s | + | |
| + | ) s | ||
| ON t1.id = s.id AND t1.modified_date = s.max_modified; | ON t1.id = s.id AND t1.modified_date = s.max_modified; | ||
| </ | </ | ||
| + | |||
| + | Source: https:// | ||