Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| kb:hive [2017/11/26 16:30] – [Migrate CSV to hive table] yehuda | kb:hive [2022/01/03 16:03] (current) – external edit 127.0.0.1 | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ====== Hive ====== | ====== Hive ====== | ||
| + | https:// | ||
| + | <code sql> | ||
| + | -- | ||
| + | -- In order to support an on-going reconciliation between current records in HIVE and new change records, two tables | ||
| + | -- should be defined: base_table and incremental_table | ||
| + | -- | ||
| + | -- base_table | ||
| + | -- The example below shows DDL for the Hive table “base_table” that will include any delimited files located in | ||
| + | -- HDFS under the ‘/ | ||
| + | -- source system. After the first processing run, it will house the on-going, most up-to-date set of records from the source system: | ||
| + | -- | ||
| + | CREATE TABLE base_table ( | ||
| + | id string, | ||
| + | field1 string, field2 string, field3 string, field4 string, field5 string, | ||
| + | modified_date string | ||
| + | ) | ||
| + | ROW FORMAT DELIMITED | ||
| + | FIELDS TERMINATED BY ',' | ||
| + | LOCATION '/ | ||
| + | |||
| + | -- # | ||
| + | -- The DDL below shows an external Hive table “incremental_table” that will include | ||
| + | -- any delimited files with incremental change records, located in HDFS under the ‘/ | ||
| + | -- directory: | ||
| + | CREATE EXTERNAL TABLE incremental_table ( | ||
| + | id string, | ||
| + | field1 string, field2 string, field3 string, field4 string, field5 string, | ||
| + | modified_date string | ||
| + | ) | ||
| + | ROW FORMAT DELIMITED | ||
| + | FIELDS TERMINATED BY ',' | ||
| + | LOCATION '/ | ||
| + | |||
| + | -- # | ||
| + | -- This view combines record sets from both the Base (base_table) and | ||
| + | -- Change (incremental_table) tables and is reduced only to the most recent records for each unique “id”. | ||
| + | -- It is defined as follows: | ||
| + | CREATE VIEW reconcile_view AS | ||
| + | SELECT t1.* FROM | ||
| + | ( | ||
| + | | ||
| + | UNION ALL | ||
| + | | ||
| + | ) t1 | ||
| + | |||
| + | JOIN | ||
| + | |||
| + | ( | ||
| + | | ||
| + | | ||
| + | ( | ||
| + | SELECT * FROM base_table | ||
| + | UNION ALL | ||
| + | SELECT * FROM incremental_table | ||
| + | ) t2 | ||
| + | GROUP BY id | ||
| + | |||
| + | ) s | ||
| + | ON t1.id = s.id AND t1.modified_date = s.max_modified; | ||
| + | </ | ||
| + | |||
| + | Source: https:// | ||