Quick code
SET hive.execution.engine=mr;
SET hive.execution.engine=tez;
hive --hiveconf hive.root.logger=DEBUG,console
ADDING UDFs
GRANT ALL ON uri 'file:///opt/local/hive/lib/tex*********t.jar' TO ROLE etl_ops
DROP FUNCTION etl_db.test_yehuda_hash
CREATE FUNCTION etl_db.test_yehuda_hash AS 'com.*******HashMultiParamsUDF'
USING jar 'hdfs:///data_lake/udfs/tex***.jar';
SELECT et2l_db.test_yehuda_hash("test","a")
Migrate CSV to hive table
DROP TABLE IF EXISTS schema.tbl_external_csv;
DROP TABLE IF EXISTS schema.tbl_internal;
--
-- Create external csv reader
--
CREATE EXTERNAL TABLE IF NOT EXISTS schema.tbl_external_csv(
name STRING,
guid STRING,
src_date String,
KEY INT)
COMMENT 'tbl_internal external table'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION '/user/raw_user/raw_files'
tblproperties("skip.header.line.count"="1");
--
-- Create tbl_internal
--
CREATE TABLE IF NOT EXISTS schema.tbl_internal (
name STRING,
guid STRING,
`date` TIMESTAMP,
KEY INT
) STORED AS parquet;
--
-- Import data form external
--
INSERT OVERWRITE TABLE `schema`.`tbl_internal`
SELECT
`name`,
`guid`,
from_unixtime(unix_timestamp(`src_date`, 'dd/MM/yyyy'),'yyyy-MM-dd') AS `date`,
`key`
FROM `schema`.`tbl_external_csv`;
Back to top