From 0a53219c52d63edc4f49df807dbededb7280fec9 Mon Sep 17 00:00:00 2001 From: James Hetherington Date: Tue, 1 Nov 2016 14:04:06 +0000 Subject: [PATCH 1/3] gerun inspired runner for spark on UCL clusters --- spark-files/spark run | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 spark-files/spark run diff --git a/spark-files/spark run b/spark-files/spark run new file mode 100644 index 00000000..d4706739 --- /dev/null +++ b/spark-files/spark run @@ -0,0 +1,36 @@ +# A gerun-inspired wrapper for running spark jobs on UCL Clusters +# Mostly based on a script by Jonathan Dursi +# from this post: http://www.dursi.ca/spark-in-hpc-clusters/ + +# Make a per-job output directory +OUTPUT_DIR="$(pwd)/run.${JOB_ID}" +mkdir -p "$OUTPUT_DIR" + +# Get our list of allocated nodes and how many +nodes=($( sort -u <"$TMPDIR/machines" | sed -e 's/$/.data.legion.ucl.ac.uk/' )) +nnodes=${#nodes[@]} +last=$(( nnodes - 1 )) + +export SPARK_LOCAL_DIRS="$TMPDIR" + +ssh "${nodes[0]}" "module load java; cd ${SPARK_HOME}; ./sbin/start-master.sh" +sparkmaster="spark://${nodes[0]}:7077" + +# Start the spark workers on all nodes +for i in $( seq 0 $last ) +do + ssh "${nodes[$i]}" "cd ${SPARK_HOME}; module load java; export SPARK_LOCAL_DIRS=\"$TMPDIR\"; nohup spark-class org.apache.spark.deploy.worker.Worker ${sparkmaster} &> ${OUTPUT_DIR}/nohup-${nodes[$i]}.out" & +done + +# Submit the script to the Spark cluster + +spark-submit --master "${sparkmaster}" "$@" + +# Stop the Spark master and kill all the Spark processes to clean up +for i in $( seq 0 $last ) +do + ssh "${nodes[$i]}" "module load java; cd ${SPARK_HOME}; ./sbin/stop-slaves.sh" & +done + +ssh "${nodes[0]}" "module load java; cd ${SPARK_HOME}; ./sbin/stop-master.sh" & +wait From 9a13067e17412d4aac168619bb397840790c01ba Mon Sep 17 00:00:00 2001 From: James Hetherington Date: Tue, 1 Nov 2016 14:04:32 +0000 Subject: [PATCH 2/3] It put a space in my name! --- spark-files/{spark run => sparkrun} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename spark-files/{spark run => sparkrun} (100%) diff --git a/spark-files/spark run b/spark-files/sparkrun similarity index 100% rename from spark-files/spark run rename to spark-files/sparkrun From c22a3c2a448502937ee62f352be4485b050be59f Mon Sep 17 00:00:00 2001 From: James Hetherington Date: Fri, 11 Nov 2016 10:30:25 +0000 Subject: [PATCH 3/3] Create spark-2.0.1-install --- spark-2.0.1-install | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 spark-2.0.1-install diff --git a/spark-2.0.1-install b/spark-2.0.1-install new file mode 100644 index 00000000..dc8faaf5 --- /dev/null +++ b/spark-2.0.1-install @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +############################################### +# Installing spark +# +# by James Hetherington, 2016 +# + +APPNAME=${APPNAME:-spark} +VERSION=${VERSION:-2.0.1} +# We do not need Hadoop. But the built jars "binaries" contain a Hadoop version. +HADOOPVERSION=${HADOOPVERSION:-2.7} +INSTALL_ZONE=/home/ucgajhe/software/ +# INSTALL will be to /shared/ucl/apps/ +INSTALL_PREFIX=${INSTALL_PREFIX:-$INSTALL_ZONE/$APPNAME/$VERSION} +# TEMPDIR will be on /dev/shm +TEMP_ZONE=/home/ucgajhe/software/shm + +SHA256=${SHA256:-3d017807650f41377118a736e2f2298cd0146a593e7243a28c2ed72a88b9a043} +SRC_ARCHIVE=${SRC_ARCHIVE:-http://mirror.catn.com/pub/apache/$APPNAME/$APPNAME-$VERSION/$APPNAME-$VERSION-bin-hadoop${HADOOPVERSION}.tgz} + +set -e + +export PATH=$INSTALL_PREFIX/bin:$PATH + +echo Install prefix : $INSTALL_PREFIX + +mkdir -p $TEMP_ZONE/$APPNAME +temp_dir=`mktemp -d -p $TEMP_ZONE/$APPNAME` + +cd $temp_dir + +wget -O ${APPNAME}-${VERSION}.tgz $SRC_ARCHIVE + +CHECKSUM=`sha256sum ${APPNAME}-${VERSION}.tgz | awk '{print $1}'` + +if [ "$SHA256" == "$CHECKSUM" ] +then + mkdir -p $INSTALL_PREFIX + cd $INSTALL_PREFIX + tar -xzvf $temp_dir/${APPNAME}-${VERSION}.tgz + mv spark-${VERSION}-bin-hadoop${HADOOPVERSION}/* . + rmdir spark-${VERSION}-bin-hadoop${HADOOPVERSION} + wget -O bin/sparkrun https://raw.githubusercontent.com/jamespjh/rcps-buildscripts/master/spark-files/sparkrun + chmod a+x bin/sparkrun +else + echo "Hash mismatch." + echo "Expected: $SHA256" + echo "Got: $CHECKSUM" +fi