Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
kb:hive [2018/11/13 12:30] – yehuda | kb:hive [2022/01/03 16:03] (current) – external edit 127.0.0.1 | ||
---|---|---|---|
Line 4: | Line 4: | ||
<code sql> | <code sql> | ||
- | -- In order to support an on-going reconciliation between current records in HIVE and new change records, two tables should be defined: base_table and incremental_table | + | -- |
+ | -- In order to support an on-going reconciliation between current records in HIVE and new change records, two tables | ||
+ | -- should be defined: base_table and incremental_table | ||
+ | -- | ||
-- base_table | -- base_table | ||
- | -- The example below shows DDL for the Hive table “base_table” that will include any delimited files located in HDFS under the ‘/ | + | -- The example below shows DDL for the Hive table “base_table” that will include any delimited files located in |
- | + | -- HDFS under the ‘/ | |
- | + | -- source system. After the first processing run, it will house the on-going, most up-to-date set of records from the source system: | |
+ | -- | ||
CREATE TABLE base_table ( | CREATE TABLE base_table ( | ||
- | |||
id string, | id string, | ||
- | + | field1 string, field2 string, field3 string, field4 string, field5 string, | |
- | field1 string, | + | |
- | + | ||
- | field2 string, | + | |
- | + | ||
- | field3 string, | + | |
- | + | ||
- | field4 string, | + | |
- | + | ||
- | field5 string, | + | |
modified_date string | modified_date string | ||
- | |||
) | ) | ||
- | |||
ROW FORMAT DELIMITED | ROW FORMAT DELIMITED | ||
- | |||
FIELDS TERMINATED BY ',' | FIELDS TERMINATED BY ',' | ||
- | |||
LOCATION '/ | LOCATION '/ | ||
-- # | -- # | ||
- | -- The DDL below shows an external Hive table “incremental_table” that will include any delimited files with incremental change records, located in HDFS under the ‘/ | + | -- The DDL below shows an external Hive table “incremental_table” that will include |
- | + | -- any delimited files with incremental change records, located in HDFS under the ‘/ | |
- | + | -- directory: | |
CREATE EXTERNAL TABLE incremental_table ( | CREATE EXTERNAL TABLE incremental_table ( | ||
- | |||
id string, | id string, | ||
- | + | field1 string, field2 string, field3 string, field4 string, field5 string, | |
- | field1 string, | + | |
- | + | ||
- | field2 string, | + | |
- | + | ||
- | field3 string, | + | |
- | + | ||
- | field4 string, | + | |
- | + | ||
- | field5 string, | + | |
modified_date string | modified_date string | ||
- | |||
) | ) | ||
- | |||
ROW FORMAT DELIMITED | ROW FORMAT DELIMITED | ||
- | |||
FIELDS TERMINATED BY ',' | FIELDS TERMINATED BY ',' | ||
- | |||
LOCATION '/ | LOCATION '/ | ||
-- # | -- # | ||
- | -- This view combines record sets from both the Base (base_table) and Change (incremental_table) tables and is reduced only to the most recent records for each unique “id”. | + | -- This view combines record sets from both the Base (base_table) and |
+ | -- Change (incremental_table) tables and is reduced only to the most recent records for each unique “id”. | ||
-- It is defined as follows: | -- It is defined as follows: | ||
- | |||
- | |||
CREATE VIEW reconcile_view AS | CREATE VIEW reconcile_view AS | ||
- | |||
SELECT t1.* FROM | SELECT t1.* FROM | ||
- | + | ( | |
- | (SELECT * FROM base_table | + | |
- | + | | |
- | UNION ALL | + | |
- | + | ) t1 | |
- | SELECT * FROM incremental_table) t1 | + | |
JOIN | JOIN | ||
- | (SELECT id, max(modified_date) max_modified FROM | + | ( |
- | + | SELECT id, max(modified_date) max_modified | |
- | (SELECT * FROM base_table | + | FROM |
- | + | ( | |
- | UNION ALL | + | SELECT * FROM base_table |
- | + | UNION ALL | |
- | SELECT * FROM incremental_table) t2 | + | SELECT * FROM incremental_table |
- | + | ) t2 | |
- | GROUP BY id) s | + | |
+ | ) s | ||
ON t1.id = s.id AND t1.modified_date = s.max_modified; | ON t1.id = s.id AND t1.modified_date = s.max_modified; | ||
</ | </ | ||
+ | |||
+ | Source: https:// |