-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpig-spark
executable file
·86 lines (64 loc) · 2.38 KB
/
pig-spark
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
# Getting started
#
# Compile spark with hadoop 2.2.0 -SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true ./sbt/sbt clean assembly to generate spark.jar
# Compile pig with -Dhadoopversion=23 flag
# Configure following environment variables to run it on YARN cluster
# Follow this guide for for enabling running spork:
## http://docs.sigmoidanalytics.com/index.php/Setting_up_spork_with_spark_0.8.1
if [ -z "$HADOOP_CONF_DIR" ]; then
echo "You need to set HADOOP_CONF_DIR"
exit 1
fi
if [ -z "$HADOOP_HOME" ]; then
echo "You need to set HADOOP_HOME"
exit 1
fi
# Not necessary after SPARK-1053
#export SPARK_YARN_APP_JAR=build/pig-withouthadoop.jar
# To debug OOMs
#export SPARK_JAVA_OPTS=" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap.dump"
#export SPARK_JAVA_OPTS+=" -Xdebug -Xrunjdwp:transport=dt_socket,address=12345,server=y,suspend=y"
# Settings to work with YARN, spark jar compiled with hadoop 2
export SPARK_MASTER=spark://127.0.0.1:7077
#if want to run on spark master
#export SPARK_MASTER=spark://-------:7077
#export MESOS_NATIVE_LIBRARY= <libmesos.so>
####
# resolve links - $0 may be a softlink
this="$0"
while [ -h "$this" ]; do
ls=`ls -ld "$this"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '.*/.*' > /dev/null; then
this="$link"
else
this=`dirname "$this"`/"$link"
fi
done
# convert relative path to absolute path
bin=`dirname "$this"`
script=`basename "$this"`
bin=`unset CDPATH; cd "$bin"; pwd`
this="$bin/$script"
# the root of the Pig installation
export PIG_HOME=`dirname "$this"`
# jars to ship, pig-withouthadoop.jar to workaround Classloader issue
# Pig settings
if [ -z "$PIG_HOME" ]; then
echo "You need to set PIG_HOME"
exit 1
fi
export PIG_CLASSPATH=$PIG_HOME/build/ivy/lib/Pig/netty-3.6.6.Final.jar:$PIG_HOME/pig-withouthadoop.jar:$PIG_HOME/lib/spark/mesos-0.9.0.jar:$PIG_HOME/build/ivy/lib/Pig/*:$HADOOP_CONF_DIR:$PIG_HOME/conf
export SPARK_JARS="$PIG_HOME/build/pig-0.12.0-SNAPSHOT-withdependencies.jar,$PIG_HOME/build/ivy/lib/Pig/twitter4j-core-3.0.3.jar,$PIG_HOME/build/ivy/lib/Pig/twitter4j-stream-3.0.3.jar"
export SPARK_PIG_JAR=$PIG_HOME/pig.jar
if [ -z "$SPARK_HOME" ]; then
echo "You need to set SPARK_HOME"
exit 1
fi
# Cluster settings
export SPARK_WORKER_CORES=1
export SPARK_WORKER_MEMORY=512m
export SPARK_MEM=512m
export SPARK_WORKER_INSTANCES=1
bin/pig -x spark "$@"