<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="FeedCreator 1.8" -->
<?xml-stylesheet href="https://wiki.korotkin.co.il/lib/exe/css.php?s=feed" type="text/css"?>
<rdf:RDF
    xmlns="http://purl.org/rss/1.0/"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
    xmlns:dc="http://purl.org/dc/elements/1.1/">
    <channel rdf:about="https://wiki.korotkin.co.il/feed.php">
        <title>KB - kb:howto</title>
        <description></description>
        <link>https://wiki.korotkin.co.il/</link>
        <image rdf:resource="https://wiki.korotkin.co.il/_media/wiki/logo.png" />
       <dc:date>2026-04-10T17:06:54+00:00</dc:date>
        <items>
            <rdf:Seq>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/howto/create_and_save_simple_spark_ml_pipline?rev=1641225830&amp;do=diff"/>
                <rdf:li rdf:resource="https://wiki.korotkin.co.il/kb/howto/create_simple_dataframe_from_array?rev=1641225830&amp;do=diff"/>
            </rdf:Seq>
        </items>
    </channel>
    <image rdf:about="https://wiki.korotkin.co.il/_media/wiki/logo.png">
        <title>KB</title>
        <link>https://wiki.korotkin.co.il/</link>
        <url>https://wiki.korotkin.co.il/_media/wiki/logo.png</url>
    </image>
    <item rdf:about="https://wiki.korotkin.co.il/kb/howto/create_and_save_simple_spark_ml_pipline?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>create_and_save_simple_spark_ml_pipline</title>
        <link>https://wiki.korotkin.co.il/kb/howto/create_and_save_simple_spark_ml_pipline?rev=1641225830&amp;do=diff</link>
        <description>Create and save simple spark ml pipline



# Import standard PySpark Transformers and packages
from pyspark.ml.feature import VectorAssembler, StandardScaler, OneHotEncoder, StringIndexer
from pyspark.ml import Pipeline, PipelineModel
from pyspark.sql import Row

# Create a test data frame
l = [(&#039;Alice&#039;, 1), (&#039;Bob&#039;, 2)]
rdd = sc.parallelize(l)
Person = Row(&#039;name&#039;, &#039;age&#039;)
person = rdd.map(lambda r: Person(*r))
df2 = spark.createDataFrame(person)
df2.collect()

# Build a very simple pipeline using…</description>
    </item>
    <item rdf:about="https://wiki.korotkin.co.il/kb/howto/create_simple_dataframe_from_array?rev=1641225830&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-03T16:03:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>create_simple_dataframe_from_array</title>
        <link>https://wiki.korotkin.co.il/kb/howto/create_simple_dataframe_from_array?rev=1641225830&amp;do=diff</link>
        <description>Create simple dataframe from array


pr = [[&#039;Alice&#039;], [&#039;Bob&#039;]]
pdf = sc.parallelize(pr).toDF([&#039;name&#039;])
pdf.show()</description>
    </item>
</rdf:RDF>
