-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathHeightsExample.py
More file actions
39 lines (31 loc) · 1.03 KB
/
HeightsExample.py
File metadata and controls
39 lines (31 loc) · 1.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
from pyspark import SparkContext, SparkConf
from pyspark import RDD
from pyspark.sql.types import DoubleType
from pyspark.sql import SparkSession
from pyspark.sql import DataFrame
from pyspark.sql.functions import col
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType
HEIGHTS_SCHEMA = StructType([
StructField("id", IntegerType()),
StructField("Height", DoubleType()),
StructField("Gender", IntegerType())
])
def main():
print("Started aggregation example")
spark = get_spark_session()
df: DataFrame = spark.read\
.schema(HEIGHTS_SCHEMA)\
.option("header", "true")\
.csv("./sample_datasets/heights.csv")
df = df.filter("height > 170")
df.groupBy("Gender").avg("Height").show()
print("Finished aggregation example")
def get_spark_session():
return SparkSession\
.builder.master("local")\
.appName('SparkMapReduceExample')\
.getOrCreate()
if __name__ == '__main__':
main()