-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathHeightsExampleSparkSql.py
More file actions
33 lines (26 loc) · 985 Bytes
/
HeightsExampleSparkSql.py
File metadata and controls
33 lines (26 loc) · 985 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import os
from pyspark import SparkContext, SparkConf
from pyspark import RDD
from pyspark.sql import SparkSession
from pyspark.sql import DataFrame
from pyspark.sql.functions import col
from time import sleep
def main():
print("Started aggregation example")
spark = get_spark_session()
df_heights: DataFrame = spark.read.option("header", "true")\
.csv("./sample_datasets/heights.csv")
df_heights.createOrReplaceTempView("heights")
df_names: DataFrame = spark.read.option("header", "true")\
.csv("./sample_datasets/names.csv")
df_names.createOrReplaceTempView("names")
df_joined = spark.sql("SELECT *, Height / 2.54 as HeightInInch FROM heights join names on heights.id = names.id")
df_joined.show()
print("Finished aggregation example")
def get_spark_session():
return SparkSession\
.builder.master("local")\
.appName('SparkMapReduceExample')\
.getOrCreate()
if __name__ == '__main__':
main()