<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="FeedCreator 1.8" -->
<?xml-stylesheet href="https://wiki.korotkin.co.il/lib/exe/css.php?s=feed" type="text/css"?>
<rdf:RDF
    xmlns="http://purl.org/rss/1.0/"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
    xmlns:dc="http://purl.org/dc/elements/1.1/">
    <channel rdf:about="https://wiki.korotkin.co.il/feed.php">
        <title>KB - kb:bigdata</title>
        <description></description>
        <link>https://wiki.korotkin.co.il/</link>
        <image rdf:resource="https://wiki.korotkin.co.il/_media/wiki/logo.png" />
       <dc:date>2026-04-30T00:02:43+00:00</dc:date>
        <items>
            <rdf:Seq>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/apache_phoenix?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/cassandra?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/eco-system?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/file_types?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/hive?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/hortonworks?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/knox?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/nifi?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/spark?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/sqoop?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/bigdata/yarn?rev=1641225830&amp;do=diff"/>
            </rdf:Seq>
        </items>
    </channel>
    <image rdf:about="https://wiki.korotkin.co.il/_media/wiki/logo.png">
        <title>KB</title>
        <link>https://wiki.korotkin.co.il/</link>
        <url>https://wiki.korotkin.co.il/_media/wiki/logo.png</url>
    </image>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/apache_phoenix?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>apache_phoenix</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/apache_phoenix?rev=1641225830&amp;do=diff</link>
        <description>Apache Phoenix

Update hbase table


CREATE VIEW &quot;users_data&quot; 
( 
    ROWKEY VARCHAR PRIMARY KEY, 
    &quot;personal_info&quot;.&quot;firstName&quot; VARCHAR, 
    &quot;personal_info&quot;.&quot;lastName&quot; VARCHAR, 
    &quot;personal_info&quot;.&quot;gender&quot; VARCHAR, 
    &quot;contact_info&quot;.&quot;mail&quot; VARCHAR, 
    &quot;contact_info&quot;.&quot;mobile&quot; VARCHAR ) ;</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/cassandra?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>cassandra</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/cassandra?rev=1641225830&amp;do=diff</link>
        <description>Cassandra

DBeaver GUI tool</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/eco-system?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>eco-system</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/eco-system?rev=1641225830&amp;do=diff</link>
        <description>BigData ecoSystem

and tools etc

	*  Apache Ozone</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/file_types?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>file_types</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/file_types?rev=1641225830&amp;do=diff</link>
        <description>File types

Spark + Parquet In Depth
File Format Benchmark Avro JSON ORC and Parquet
Berlin buzzwords18: Owen O&#039;Malley – Fast Access To Your Complex Data - Avro, JSON, ORC, and Parquet

Parquet

	*  Better Column selecting
	*  Columnar format
	*  Binary format
	*  Encoded &amp; Compressed
	*  Support schema evolution - Format supports

Limitation:

	*  Pushdown filters dont works on String / Binary (source)
	*  Write speed tradeoff</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/hive?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hive</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/hive?rev=1641225830&amp;do=diff</link>
        <description>Quick code


set hive.execution.engine=mr; 

set hive.execution.engine=tez;



hive --hiveconf hive.root.logger=DEBUG,console


ADDING UDFs


grant all on uri &#039;file:///opt/local/hive/lib/tex*********t.jar&#039; to role etl_ops

drop function etl_db.test_yehuda_hash

create function etl_db.test_yehuda_hash as &#039;com.*******HashMultiParamsUDF&#039; 
using jar &#039;hdfs:///data_lake/udfs/tex***.jar&#039;;


select et2l_db.test_yehuda_hash(&quot;test&quot;,&quot;a&quot;)</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/hortonworks?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hortonworks</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/hortonworks?rev=1641225830&amp;do=diff</link>
        <description>Hortonworks

hdp passwords</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/knox?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>knox</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/knox?rev=1641225830&amp;do=diff</link>
        <description>Knox

	*  Add a service
	*  HDP 2.6.5 Knox documentation</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/nifi?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>nifi</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/nifi?rev=1641225830&amp;do=diff</link>
        <description>Apache NiFi

Nice presentations:

	*  &lt;https://www.slideshare.net/Hadoop_Summit/best-practices-and-lessons-learnt-from-running-apache-nifi-at-renault&gt;</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/spark?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>spark</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/spark?rev=1641225830&amp;do=diff</link>
        <description>Spark

Snippits

	*  Import Json
	*  ML Save and Load model
	*  Spark Scala and Maven pom.xml
	*  PySpark and HBase

Spark ML

	*  spark-mllib-pipelines-persistence
	*  ML Pipeline</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/sqoop?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>sqoop</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/sqoop?rev=1641225830&amp;do=diff</link>
        <description>Sqoop

TeraData sqoop

Sqoop from teradata
 import from teradata</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/bigdata/yarn?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>yarn</title>
        <link>https://wiki.korotkin.co.il/kb/bigdata/yarn?rev=1641225830&amp;do=diff</link>
        <description>Yarn

Snippets

Show stdout logs


# located /usr/bin/yarn
yarn logs --applicationId application_1513381650251_42086</description>
    </item>
</rdf:RDF>
