====== Quick code ====== set hive.execution.engine=mr; set hive.execution.engine=tez; hive --hiveconf hive.root.logger=DEBUG,console ===== ADDING UDFs ===== grant all on uri 'file:///opt/local/hive/lib/tex*********t.jar' to role etl_ops drop function etl_db.test_yehuda_hash create function etl_db.test_yehuda_hash as 'com.*******HashMultiParamsUDF' using jar 'hdfs:///data_lake/udfs/tex***.jar'; select et2l_db.test_yehuda_hash("test","a") ===== Migrate CSV to hive table ===== DROP TABLE IF EXISTS schema.tbl_external_csv; DROP TABLE IF EXISTS schema.tbl_internal; -- -- Create external csv reader -- CREATE EXTERNAL TABLE IF NOT EXISTS schema.tbl_external_csv( name STRING, guid STRING, src_date String, key INT) COMMENT 'tbl_internal external table' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE LOCATION '/user/raw_user/raw_files' tblproperties("skip.header.line.count"="1"); -- -- Create tbl_internal -- CREATE TABLE IF NOT EXISTS schema.tbl_internal ( name STRING, guid STRING, `date` TIMESTAMP, key INT ) STORED AS parquet; -- -- Import data form external -- INSERT OVERWRITE TABLE `schema`.`tbl_internal` SELECT `name`, `guid`, from_unixtime(unix_timestamp(`src_date`, 'dd/MM/yyyy'),'yyyy-MM-dd') as `date`, `key` FROM `schema`.`tbl_external_csv`;