This is an old revision of the document!
Table of Contents
Quick code
SET hive.execution.engine=mr; SET hive.execution.engine=tez;
hive --hiveconf hive.root.logger=DEBUG,console
ADDING UDFs
GRANT ALL ON uri 'file:///opt/local/hive/lib/tex*********t.jar' TO ROLE etl_ops DROP FUNCTION etl_db.test_yehuda_hash CREATE FUNCTION etl_db.test_yehuda_hash AS 'com.*******HashMultiParamsUDF' USING jar 'hdfs:///data_lake/udfs/tex***.jar'; SELECT et2l_db.test_yehuda_hash("test","a")
Migrate CSV to hive table
DROP TABLE IF EXISTS schema.tbl_external_csv; DROP TABLE IF EXISTS schema.tbl_internal; -- -- Create external csv reader -- CREATE EXTERNAL TABLE IF NOT EXISTS schema.tbl_external_csv( name STRING, guid STRING, src_date String, KEY INT) COMMENT 'tbl_internal external table' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE LOCATION '/user/raw_user/raw_files' tblproperties("skip.header.line.count"="1"); -- -- Create tbl_internal -- CREATE TABLE IF NOT EXISTS schema.tbl_internal ( name STRING, guid STRING, `date` TIMESTAMP, KEY INT ) STORED AS parquet; -- -- Import data form external -- INSERT OVERWRITE TABLE `schema`.`tbl_internal` SELECT `name`, `guid`, from_unixtime(unix_timestamp(`src_date`, 'dd/MM/yyyy'),'yyyy-MM-dd') AS `date`, `key` FROM `schema`.`tbl_external_csv`;