<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="FeedCreator 1.8" -->
<?xml-stylesheet href="https://wiki.korotkin.co.il/lib/exe/css.php?s=feed" type="text/css"?>
<rdf:RDF
    xmlns="http://purl.org/rss/1.0/"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
    xmlns:dc="http://purl.org/dc/elements/1.1/">
    <channel rdf:about="https://wiki.korotkin.co.il/feed.php">
        <title>KB - learn:bigdata</title>
        <description></description>
        <link>https://wiki.korotkin.co.il/</link>
        <image rdf:resource="https://wiki.korotkin.co.il/_media/wiki/logo.png" />
       <dc:date>2026-04-09T12:39:00+00:00</dc:date>
        <items>
            <rdf:Seq>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/ambari?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/avro?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/cassandra?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/chukwa?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/crunch?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/data_block?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/data_blocks?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/data_node?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/drill?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/edit_log?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/flume?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/fsimage?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/hadoop?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/hama?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/hbase?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/hcatalog?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/hdfs?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/hdt?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/hive?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/ibm_biginsights?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/install_hadoop_eco_system_single_mode?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/job_client?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/job_tracker?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/konx?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/lucene?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/mahout?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/mapreduce?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/name_node?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/oozie?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/pig?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/replication_factor?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/secondary_name_node?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/spark?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/sqoop?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/task_tracker?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/thrift?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/yarn?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/learn/bigdata/zookeeper?rev=1641225830&amp;do=diff"/>
            </rdf:Seq>
        </items>
    </channel>
    <image rdf:about="https://wiki.korotkin.co.il/_media/wiki/logo.png">
        <title>KB</title>
        <link>https://wiki.korotkin.co.il/</link>
        <url>https://wiki.korotkin.co.il/_media/wiki/logo.png</url>
    </image>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/ambari?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ambari</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/ambari?rev=1641225830&amp;do=diff</link>
        <description>ambari

Install accross nodes and monitor</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/avro?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>avro</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/avro?rev=1641225830&amp;do=diff</link>
        <description>Avro

Data exchage. Generice for hadoop</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/cassandra?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>cassandra</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/cassandra?rev=1641225830&amp;do=diff</link>
        <description>Cassandra

NoSQL database</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/chukwa?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>chukwa</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/chukwa?rev=1641225830&amp;do=diff</link>
        <description>chukwa

RealTime log proccesing tools</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/crunch?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>crunch</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/crunch?rev=1641225830&amp;do=diff</link>
        <description>Crunch

Runing MapReduce Pipelines,
Join and aggregate data</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/data_block?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>data_block</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/data_block?rev=1641225830&amp;do=diff</link>
        <description>Data Block

Data (ex. file) is splitted to Data blocks.</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/data_blocks?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>data_blocks</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/data_blocks?rev=1641225830&amp;do=diff</link>
        <description>Data blocks

	*  In Hadoop default is to store 64MB block size
	*  Block placment will put block of single file near to each other by define the distance with bandwidth.</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/data_node?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>data_node</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/data_node?rev=1641225830&amp;do=diff</link>
        <description>Data Nodes

	*  Block OPs
	*  Replications
	*  Communicate directly with clients
	*  Read and writes data to hard disk
	*  Data node Send heart-bit every 3 secounds to Name node</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/drill?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>drill</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/drill?rev=1641225830&amp;do=diff</link>
        <description>Drill

Nasted Data investigation</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/edit_log?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>edit_log</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/edit_log?rev=1641225830&amp;do=diff</link>
        <description>Edit log

 (aka Journal) Track any client changes of information</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/flume?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>flume</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/flume?rev=1641225830&amp;do=diff</link>
        <description>flume

RealTime log proccesing tools</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/fsimage?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>fsimage</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/fsimage?rev=1641225830&amp;do=diff</link>
        <description>fsImage

see Name node and Secondary name node

fsImage is file that can loaded to RAM that contains mapping of Data blocks on Data nodes

Structure

	*  Snapshot of entire file system
		*  Tracks all the files
		*  Tracks Replication value - can be configured by Replication factor</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/hadoop?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hadoop</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/hadoop?rev=1641225830&amp;do=diff</link>
        <description>Hadoop

Hadoop is framework that has set of tools to distrebute and proccess data over clasters.

	*  Scalable
	*  Flexible
	*  Fault-tolerant
	*  Intelligent

Main tools are HDFS (HaDoop File System), MapReduce and YARN

Installing Hadoop

Single node Cluser

	*  Standalon mode - all hadoop components run under single</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/hama?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hama</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/hama?rev=1641225830&amp;do=diff</link>
        <description>Hama

Bulk sync processing</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/hbase?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hbase</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/hbase?rev=1641225830&amp;do=diff</link>
        <description>HBase

Tools

	*  &lt;https://github.com/NiceSystems/hrider/wiki&gt;

Speed-up HBase


# Disable WAL
LOAD HBASE DATA INPATH &#039;somewhere/data.txt&#039; DELIMITED FIELDS
TERMINATED BY &#039;|&#039; INTO TABLE ORDERS DISABLE WAL

# Inc. write buffer
set hbase.client.write.buffer = 8388600</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/hcatalog?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hcatalog</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/hcatalog?rev=1641225830&amp;do=diff</link>
        <description>HCatalog

Meta table - shared schemas</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/hdfs?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hdfs</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/hdfs?rev=1641225830&amp;do=diff</link>
        <description>HDFS (HaDoop File System)

HDFS is one of utils that included at Hadoop framework.

	*  HDFS knows to handle large file (Ex. 1ptb) and split it to Data blocks and distrebute it over Cluster.
	*  HDFS has Fault tolerance - HDFS replicate same Data block on X Data nodes. X is  Replication factor
	*  HDFS Master / Slaves architecture</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/hdt?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hdt</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/hdt?rev=1641225830&amp;do=diff</link>
        <description>HDT

Hadoop development tool integrate with Eclipse</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/hive?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hive</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/hive?rev=1641225830&amp;do=diff</link>
        <description>Hive

HiveQL - simmilar to SQL.</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/ibm_biginsights?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ibm_biginsights</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/ibm_biginsights?rev=1641225830&amp;do=diff</link>
        <description># Generate root key
ssh-keygen -t rsa

#copy key
ssh-copy-id root@bi

# should be installed
yum -y install expect mksh.x86_64 libaio compat-libstdc++-33 pam.i686 ksh rpm-build ntp
chkconfig ntpd on
ntpdate 0.asia.pool.ntp.org
echo &quot;0.000&quot; &gt; /var/lib/ntp/drift
service ntpd start

# should be password less 
chkconfig iptables off
service iptables stop
chkconfig ip6tables off
service ip6tables stop
iptables -F

#disable selinux
vi /etc/sysconfig/selinux
setenforce 0

# see the text
/etc/security/li…</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/install_hadoop_eco_system_single_mode?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>install_hadoop_eco_system_single_mode</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/install_hadoop_eco_system_single_mode?rev=1641225830&amp;do=diff</link>
        <description>Install Hadoop eco-system singlemode

install openjdk debian


sudo apt-get install openjdk-7-jdk ssh rsync

sudo addgroup hadoop
sudo adduser --ingroup hadoop hduser

su - hduser
ssh-keygen -t rsa -P &quot;&quot;
cat $HOME/.ssh/id_rsa.pub &gt;&gt; $HOME/.ssh/authorized_keys</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/job_client?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>job_client</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/job_client?rev=1641225830&amp;do=diff</link>
        <description>Job Client

Client who submit a job to Cluster</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/job_tracker?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>job_tracker</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/job_tracker?rev=1641225830&amp;do=diff</link>
        <description>Job Tracker

Master the jobs. should define execution plan and timing when to execute accross the Task trackers
Similar to Name node</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/konx?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>konx</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/konx?rev=1641225830&amp;do=diff</link>
        <description>Knox

Cenralized security aginst clusters</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/lucene?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>lucene</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/lucene?rev=1641225830&amp;do=diff</link>
        <description>Lucene

Full text</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/mahout?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>mahout</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/mahout?rev=1641225830&amp;do=diff</link>
        <description>mahout

Machine larninge - Recommendation engin
Recumnedation, Clastring, Classification</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/mapreduce?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>mapreduce</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/mapreduce?rev=1641225830&amp;do=diff</link>
        <description>Map Reduce

Map reduce work on all data inside a cluster.

Job client. Master: Job tracker, Task tracker

Map

Used to Map the data that we want to work with (Eq. to “Where” sql)

Reduce

Used to aggregate the selected data from Map</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/name_node?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>name_node</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/name_node?rev=1641225830&amp;do=diff</link>
        <description>Name node

see also Secondary name node

	*  :!: MOST IMPORATANT FOR CLUSTER! 
	*  in-memory location of every Data block in cluster / rack
	*  Controlls DataNodes
	*  FS Ops 
	*  Block Mapping
	*  Send cordinates to clients where to get the Data blocks and witch Data node
	*  Name node retrives every 3 secounds</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/oozie?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>oozie</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/oozie?rev=1641225830&amp;do=diff</link>
        <description>Oozie

Workflow tool</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/pig?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>pig</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/pig?rev=1641225830&amp;do=diff</link>
        <description>Pig

High level data flow.

	*  Pig latin - Scripting language
	*  Pig Runtime - converts Pig latin to MapReduce and execute it</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/replication_factor?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>replication_factor</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/replication_factor?rev=1641225830&amp;do=diff</link>
        <description>Replication Factor

In HDFS default RF is 3. that means each Data block will replicate 3 times on diffrent Data nodes</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/secondary_name_node?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>secondary_name_node</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/secondary_name_node?rev=1641225830&amp;do=diff</link>
        <description>Secondary name node

See Name node.

NOT HIGH AVAILBILITY :!:

Uses

	*  “System restore” for Name node
	*  Resopnsibole of keeping less dataloss if the Name node is failed

How it works

	*  Structure
		*  fsImage
		*  Edit Logs

	*  Every hour copy Edit log and fsImage from Name node
	*  If Name node failed - marges</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/spark?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>spark</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/spark?rev=1641225830&amp;do=diff</link>
        <description>Spark

In memory data - should do it faster</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/sqoop?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>sqoop</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/sqoop?rev=1641225830&amp;do=diff</link>
        <description>sqoop

Integration with relational system</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/task_tracker?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>task_tracker</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/task_tracker?rev=1641225830&amp;do=diff</link>
        <description>Task tracker

	*  Resposibole to execute map &amp; reduce
	*  Report thir progress to Job tracker
	*</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/thrift?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>thrift</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/thrift?rev=1641225830&amp;do=diff</link>
        <description>Thrift

Language nuralization form</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/yarn?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>yarn</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/yarn?rev=1641225830&amp;do=diff</link>
        <description>YARN

aka.MapReduce V2</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/learn/bigdata/zookeeper?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>zookeeper</title>
        <link>https://wiki.korotkin.co.il/learn/bigdata/zookeeper?rev=1641225830&amp;do=diff</link>
        <description>zookeeper

Cenralized managment point, that keeps clusters and services sync</description>
    </item>
</rdf:RDF>
