This is an old revision of the document!


Hive

https://hortonworks.com/blog/four-step-strategy-incremental-updates-hive/

-- In order to support an on-going reconciliation between current records in HIVE and new change records, two tables should be defined: base_table and incremental_table
-- base_table
-- The example below shows DDL for the Hive table “base_table” that will include any delimited files located in HDFS under the ‘/user/hive/base_table’ directory. This table will house the  initial, complete record load from the source system. After the first processing run, it will house the on-going, most up-to-date set of records from the source system:
 
 
 
CREATE TABLE base_table (
 
id string,
 
field1 string,
 
field2 string,
 
field3 string,
 
field4 string,
 
field5 string,
 
modified_date string
 
)
 
ROW FORMAT DELIMITED
 
FIELDS TERMINATED BY ','
 
LOCATION '/user/hive/base_table';
 
-- #incremental_table
-- The DDL below shows an external Hive table “incremental_table” that will include any delimited files with incremental change records, located in HDFS under the ‘/user/hive/incremental_append’ -- directory:
 
 
 
CREATE EXTERNAL TABLE incremental_table (
 
id string,
 
field1 string,
 
field2 string,
 
field3 string,
 
field4 string,
 
field5 string,
 
modified_date string
 
)
 
ROW FORMAT DELIMITED
 
FIELDS TERMINATED BY ','
 
LOCATION '/user/hive/incremental_table';
 
-- #reconcile_view
-- This view combines record sets from both the Base (base_table) and Change (incremental_table) tables and is reduced only to the most recent records for each unique “id”. 
-- It is defined as follows:
 
 
CREATE VIEW reconcile_view AS
 
SELECT t1.* FROM
 
(SELECT * FROM base_table
 
UNION ALL
 
SELECT * FROM incremental_table) t1
 
JOIN
 
(SELECT id, MAX(modified_date) max_modified FROM
 
(SELECT * FROM base_table
 
UNION ALL
 
SELECT * FROM incremental_table) t2
 
GROUP BY id) s
 
ON t1.id = s.id AND t1.modified_date = s.max_modified;
kb/hive.1542112204.txt.gz · Last modified: (external edit)
Back to top
Driven by DokuWiki Recent changes RSS feed Valid CSS Valid XHTML 1.0