Table of Contents

Quick code

SET hive.execution.engine=mr; 
 
SET hive.execution.engine=tez;
hive --hiveconf hive.root.logger=DEBUG,console

ADDING UDFs

GRANT ALL ON uri 'file:///opt/local/hive/lib/tex*********t.jar' TO ROLE etl_ops
 
DROP FUNCTION etl_db.test_yehuda_hash
 
CREATE FUNCTION etl_db.test_yehuda_hash AS 'com.*******HashMultiParamsUDF' 
USING jar 'hdfs:///data_lake/udfs/tex***.jar';
 
 
SELECT et2l_db.test_yehuda_hash("test","a")

Migrate CSV to hive table

DROP TABLE IF EXISTS schema.tbl_external_csv;
DROP TABLE IF EXISTS schema.tbl_internal;
 
--
-- Create external csv reader
--
CREATE EXTERNAL TABLE IF NOT EXISTS schema.tbl_external_csv(
    name STRING,
    guid STRING,
    src_date String,
    KEY INT)
COMMENT 'tbl_internal external table'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION '/user/raw_user/raw_files'
tblproperties("skip.header.line.count"="1");
 
--
-- Create tbl_internal
--
CREATE TABLE IF NOT EXISTS schema.tbl_internal (
    name STRING,
    guid STRING,
    `date` TIMESTAMP,
    KEY INT
) STORED AS parquet;
 
-- 
-- Import data form external
-- 
INSERT OVERWRITE TABLE `schema`.`tbl_internal`
SELECT
    `name`,
    `guid`,
    from_unixtime(unix_timestamp(`src_date`, 'dd/MM/yyyy'),'yyyy-MM-dd') AS `date`,
    `key`
FROM `schema`.`tbl_external_csv`;