Skip to content

Commit da69c1f

Browse files
authored
merging latest changes from master
2 parents 013278b + fd386b0 commit da69c1f

File tree

132 files changed

+3429
-352
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

132 files changed

+3429
-352
lines changed

.gitignore

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
*.class
1212
*.dll
1313
*.exe
14+
*.pyc
1415

1516
# Packages #
1617
############
@@ -40,6 +41,10 @@ build/dependencies/
4041
*.log
4142
lib/
4243

44+
# Local databases used for Dataset/frames #
45+
###########################################
46+
scala/metastore_db/
47+
4348
# Generated Files #
4449
############
4550
SparkCLRCodeCoverage.xml

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ Refer to the [docs folder](docs) for design overview and other info on Mobius
112112
|Build & run unit tests |[Build in Windows](notes/windows-instructions.md#building-mobius) |[Build in Linux](notes/linux-instructions.md#building-mobius-in-linux) |
113113
|Run samples (functional tests) in local mode |[Samples in Windows](notes/windows-instructions.md#running-samples) |[Samples in Linux](notes/linux-instructions.md#running-mobius-samples-in-linux) |
114114
|Run examples in local mode |[Examples in Windows](/notes/running-mobius-app.md#running-mobius-examples-in-local-mode) |[Examples in Linux](notes/linux-instructions.md#running-mobius-examples-in-linux) |
115-
|Run Mobius app |<ul><li>[Standalone cluster](notes/running-mobius-app.md#standalone-cluster)</li><li>[YARN cluster](notes/running-mobius-app.md#yarn-cluster)</li></ul> |<ul><li>[Linux cluster](notes/linux-instructions.md#running-mobius-applications-in-linux)</li><li>[Azure HDInsight Spark Cluster](/notes/linux-instructions.md#mobius-in-azure-hdinsight-spark-cluster)</li><li>[AWS EMR Spark Cluster](/notes/linux-instructions.md#mobius-in-amazon-web-services-emr-spark-cluster)</li> |
115+
|Run Mobius app |<ul><li>[Standalone cluster](notes/running-mobius-app.md#standalone-cluster)</li><li>[YARN cluster](notes/running-mobius-app.md#yarn-cluster)</li></ul> |<ul><li>[Linux cluster](notes/linux-instructions.md#running-mobius-applications-in-linux)</li><li>[Azure HDInsight Spark Cluster](/notes/mobius-in-hdinsight.md)</li><li>[AWS EMR Spark Cluster](/notes/linux-instructions.md#mobius-in-amazon-web-services-emr-spark-cluster)</li> |
116116
|Run Mobius Shell |<ul><li>[Local](notes/mobius-shell.md#run-shell)</li><li>[YARN](notes/mobius-shell.md#run-shell)</li></ul> | Not supported yet |
117117

118118
### Useful Links
@@ -122,7 +122,7 @@ Refer to the [docs folder](docs) for design overview and other info on Mobius
122122

123123
## Supported Spark Versions
124124

125-
Mobius is built and tested with Apache Spark [1.4.1](https://github.com/Microsoft/Mobius/tree/branch-1.4), [1.5.2](https://github.com/Microsoft/Mobius/tree/branch-1.5) and [1.6.*](https://github.com/Microsoft/Mobius/tree/branch-1.6).
125+
Mobius is built and tested with Apache Spark [1.4.1](https://github.com/Microsoft/Mobius/tree/branch-1.4), [1.5.2](https://github.com/Microsoft/Mobius/tree/branch-1.5), [1.6.*](https://github.com/Microsoft/Mobius/tree/branch-1.6) and [2.0](https://github.com/Microsoft/Mobius/tree/branch-2.0).
126126

127127
## Releases
128128

build/Build.cmd

+5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
@setlocal
22
@echo OFF
33

4+
rem
5+
rem Copyright (c) Microsoft. All rights reserved.
6+
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
7+
rem
8+
49
if "%1" == "csharp" set buildCSharp=true
510

611
SET CMDHOME=%~dp0

build/build.sh

+48-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,49 @@
11
#!/bin/bash
22

3+
#
4+
# Copyright (c) Microsoft. All rights reserved.
5+
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
6+
#
7+
38
export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
49

10+
[ ! -d "$FWDIR/dependencies" ] && mkdir "$FWDIR/dependencies"
11+
12+
echo "Download Mobius external dependencies"
13+
pushd "$FWDIR/dependencies"
14+
15+
download_dependency() {
16+
LINK=$1
17+
JAR=$2
18+
19+
if [ ! -e $JAR ];
20+
then
21+
echo "Downloading $JAR"
22+
wget -q $LINK -O $JAR
23+
24+
if [ ! -e $JAR ];
25+
then
26+
echo "Cannot download external dependency $JAR from $LINK"
27+
popd
28+
exit 1
29+
fi
30+
fi
31+
}
32+
33+
SPARK_CSV_LINK="http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.4.0/spark-csv_2.10-1.4.0.jar"
34+
SPARK_CSV_JAR="spark-csv_2.10-1.4.0.jar"
35+
download_dependency $SPARK_CSV_LINK $SPARK_CSV_JAR
36+
37+
COMMONS_CSV_LINK="http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.4/commons-csv-1.4.jar"
38+
COMMONS_CSV_JAR="commons-csv-1.4.jar"
39+
download_dependency $COMMONS_CSV_LINK $COMMONS_CSV_JAR
40+
41+
SPARK_STREAMING_KAFKA_LINK="http://search.maven.org/remotecontent?filepath=org/apache/spark/spark-streaming-kafka-0-8-assembly_2.11/2.0.0/spark-streaming-kafka-0-8-assembly_2.11-2.0.0.jar"
42+
SPARK_STREAMING_KAFKA_JAR="spark-streaming-kafka-0-8-assembly_2.11-2.0.0.jar"
43+
download_dependency $SPARK_STREAMING_KAFKA_LINK $SPARK_STREAMING_KAFKA_JAR
44+
45+
popd
46+
547
export SPARKCLR_HOME="$FWDIR/runtime"
648
echo "SPARKCLR_HOME=$SPARKCLR_HOME"
749

@@ -17,6 +59,11 @@ fi
1759
[ ! -d "$SPARKCLR_HOME/lib" ] && mkdir "$SPARKCLR_HOME/lib"
1860
[ ! -d "$SPARKCLR_HOME/samples" ] && mkdir "$SPARKCLR_HOME/samples"
1961
[ ! -d "$SPARKCLR_HOME/scripts" ] && mkdir "$SPARKCLR_HOME/scripts"
62+
[ ! -d "$SPARKCLR_HOME/dependencies" ] && mkdir "$SPARKCLR_HOME/dependencies"
63+
64+
echo "Assemble Mobius external dependencies"
65+
cp $FWDIR/dependencies/* "$SPARKCLR_HOME/dependencies/"
66+
[ $? -ne 0 ] && exit 1
2067

2168
echo "Assemble Mobius Scala components"
2269
pushd "$FWDIR/../scala"
@@ -31,7 +78,7 @@ mvn clean -q
3178
# build the package
3279
mvn package -Puber-jar -q
3380

34-
if [ $? -ne 0 ]
81+
if [ $? -ne 0 ];
3582
then
3683
echo "Build Mobius Scala components failed, stop building."
3784
popd

build/copyjar.ps1

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
#
2+
# Copyright (c) Microsoft. All rights reserved.
3+
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
4+
#
5+
16
function Get-ScriptDirectory
27
{
38
$Invocation = (Get-Variable MyInvocation -Scope 1).Value;

build/localmode/RunSamples.cmd

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
@echo OFF
2+
3+
rem
4+
rem Copyright (c) Microsoft. All rights reserved.
5+
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
6+
rem
7+
28
setlocal enabledelayedexpansion
39

410
SET CMDHOME=%~dp0
@@ -62,8 +68,8 @@ set SPARKCLR_HOME=%CMDHOME%\..\runtime
6268

6369
@rem spark-csv package and its depenedency are required for DataFrame operations in Mobius
6470
set SPARKCLR_EXT_PATH=%SPARKCLR_HOME%\dependencies
65-
set SPARKCSV_JAR1PATH=%SPARKCLR_EXT_PATH%\spark-csv_2.10-1.3.0.jar
66-
set SPARKCSV_JAR2PATH=%SPARKCLR_EXT_PATH%\commons-csv-1.1.jar
71+
set SPARKCSV_JAR1PATH=%SPARKCLR_EXT_PATH%\spark-csv_2.10-1.4.0.jar
72+
set SPARKCSV_JAR2PATH=%SPARKCLR_EXT_PATH%\commons-csv-1.4.jar
6773
set SPARKCLR_EXT_JARS=%SPARKCSV_JAR1PATH%,%SPARKCSV_JAR2PATH%
6874

6975
@rem RunSamples.cmd is in local mode, should not load Hadoop or Yarn cluster config. Disable Hadoop/Yarn conf dir.

build/localmode/downloadtools.ps1

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
#
2+
# Copyright (c) Microsoft. All rights reserved.
3+
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
4+
#
5+
16
#
27
# Input -
38
# (1) "stage" parameter, accepts either "build" or "run"
@@ -342,14 +347,14 @@ function Download-ExternalDependencies
342347

343348
$readMeStream.WriteLine("------------ Dependencies for CSV parsing in Mobius DataFrame API -----------------------------")
344349
# Downloading spark-csv package and its depenency. These packages are required for DataFrame operations in Mobius
345-
$url = "http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.3.0/spark-csv_2.10-1.3.0.jar"
346-
$output="$scriptDir\..\dependencies\spark-csv_2.10-1.3.0.jar"
350+
$url = "http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.4.0/spark-csv_2.10-1.4.0.jar"
351+
$output="$scriptDir\..\dependencies\spark-csv_2.10-1.4.0.jar"
347352
Download-File $url $output
348353
Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
349354
$readMeStream.WriteLine("$url")
350355

351-
$url = "http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.1/commons-csv-1.1.jar"
352-
$output="$scriptDir\..\dependencies\commons-csv-1.1.jar"
356+
$url = "http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.4/commons-csv-1.4.jar"
357+
$output="$scriptDir\..\dependencies\commons-csv-1.4.jar"
353358
Download-File $url $output
354359
Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
355360
$readMeStream.WriteLine("$url")

build/localmode/dumpsoftware.ps1

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
#
2+
# Copyright (c) Microsoft. All rights reserved.
3+
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
4+
#
5+
16
$x64items = @(Get-ChildItem "HKLM:SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall")
27
$x64items + @(Get-ChildItem "HKLM:SOFTWARE\wow6432node\Microsoft\Windows\CurrentVersion\Uninstall") `
38
| ForEach-object { Get-ItemProperty Microsoft.PowerShell.Core\Registry::$_ } `

build/localmode/nugetpack.ps1

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
#
2+
# Copyright (c) Microsoft. All rights reserved.
3+
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
4+
#
5+
16
$root = (split-path -parent $MyInvocation.MyCommand.Definition) + '\..\..'
27

38
# expected tagname: v{version-string}. E.g., "v1.5.2-snapshot-2", "v1.5.2-prerelease-1"

build/localmode/patchpom.ps1

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
#
2+
# Copyright (c) Microsoft. All rights reserved.
3+
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
4+
#
5+
16
#
27
# Input -
38
# "targetPom" parameter, target Pom.xml file

build/localmode/precheck.cmd

+5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
@echo OFF
22

3+
rem
4+
rem Copyright (c) Microsoft. All rights reserved.
5+
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
6+
rem
7+
38
set precheck=ok
49

510
if not exist "%JAVA_HOME%\bin\java.exe" (

build/localmode/run-samples.sh

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
#!/bin/bash
22

3+
#
4+
# Copyright (c) Microsoft. All rights reserved.
5+
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
6+
#
7+
38
export verbose=
49

510
for param in "$@"
@@ -68,9 +73,9 @@ fi
6873

6974
export SPARKCLR_HOME="$FWDIR/../runtime"
7075
# spark-csv package and its depenedency are required for DataFrame operations in Mobius
71-
export SPARKCLR_EXT_PATH="$SPARKCLR_HOME\dependencies"
72-
export SPARKCSV_JAR1PATH="$SPARKCLR_EXT_PATH\spark-csv_2.10-1.3.0.jar"
73-
export SPARKCSV_JAR2PATH="$SPARKCLR_EXT_PATH\commons-csv-1.1.jar"
76+
export SPARKCLR_EXT_PATH="$SPARKCLR_HOME/dependencies"
77+
export SPARKCSV_JAR1PATH="$SPARKCLR_EXT_PATH/spark-csv_2.10-1.4.0.jar"
78+
export SPARKCSV_JAR2PATH="$SPARKCLR_EXT_PATH/commons-csv-1.4.jar"
7479
export SPARKCLR_EXT_JARS="$SPARKCSV_JAR1PATH,$SPARKCSV_JAR2PATH"
7580

7681
# run-samples.sh is in local mode, should not load Hadoop or Yarn cluster config. Disable Hadoop/Yarn conf dir.

build/localmode/zipdir.ps1

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
#
12
# Copyright (c) Microsoft. All rights reserved.
23
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
34
#
5+
46
# This script takes in "dir" and "target" parameters, zips all files under dir to the target file
5-
#
7+
68

79
Param([string]$dir, [string]$target)
810

cpp/Build.cmd

+5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
@setlocal
22
@ECHO off
33

4+
rem
5+
rem Copyright (c) Microsoft. All rights reserved.
6+
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
7+
rem
8+
49
SET CMDHOME=%~dp0
510
@REM Remove trailing backslash \
611
set CMDHOME=%CMDHOME:~0,-1%

cpp/Clean.cmd

+6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
@ECHO OFF
2+
3+
rem
4+
rem Copyright (c) Microsoft. All rights reserved.
5+
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
6+
rem
7+
28
FOR /D /R . %%G IN (bin) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
39
FOR /D /R . %%G IN (obj) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
410
FOR /D /R . %%G IN (x64) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")

csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj

+10
Original file line numberDiff line numberDiff line change
@@ -102,20 +102,25 @@
102102
<Compile Include="Network\SockDataToken.cs" />
103103
<Compile Include="Network\SocketFactory.cs" />
104104
<Compile Include="Properties\AssemblyInfo.cs" />
105+
<Compile Include="Proxy\ICatalogProxy.cs" />
105106
<Compile Include="Proxy\IDataFrameNaFunctionsProxy.cs" />
106107
<Compile Include="Proxy\IDataFrameProxy.cs" />
107108
<Compile Include="Proxy\IDataFrameReaderProxy.cs" />
108109
<Compile Include="Proxy\IDataFrameWriterProxy.cs" />
110+
<Compile Include="Proxy\IDatasetProxy.cs" />
109111
<Compile Include="Proxy\IDStreamProxy.cs" />
110112
<Compile Include="Proxy\IHadoopConfigurationProxy.cs" />
113+
<Compile Include="Proxy\Ipc\CatalogIpcProxy.cs" />
111114
<Compile Include="Proxy\Ipc\DataFrameIpcProxy.cs" />
112115
<Compile Include="Proxy\Ipc\DataFrameNaFunctionsIpcProxy.cs" />
113116
<Compile Include="Proxy\Ipc\DataFrameReaderIpcProxy.cs" />
114117
<Compile Include="Proxy\Ipc\DataFrameWriterIpcProxy.cs" />
118+
<Compile Include="Proxy\Ipc\DatasetIpcProxy.cs" />
115119
<Compile Include="Proxy\Ipc\DStreamIpcProxy.cs" />
116120
<Compile Include="Proxy\Ipc\HadoopConfigurationIpcProxy.cs" />
117121
<Compile Include="Proxy\Ipc\RDDIpcProxy.cs" />
118122
<Compile Include="Proxy\Ipc\SparkCLRIpcProxy.cs" />
123+
<Compile Include="Proxy\Ipc\SparkSessionIpcProxy.cs" />
119124
<Compile Include="Proxy\Ipc\SqlContextIpcProxy.cs" />
120125
<Compile Include="Proxy\Ipc\StatusTrackerIpcProxy.cs" />
121126
<Compile Include="Proxy\Ipc\StreamingContextIpcProxy.cs" />
@@ -125,6 +130,7 @@
125130
<Compile Include="Proxy\ISparkConfProxy.cs" />
126131
<Compile Include="Proxy\ISparkContextProxy.cs" />
127132
<Compile Include="Proxy\Ipc\SparkConfIpcProxy.cs" />
133+
<Compile Include="Proxy\ISparkSessionProxy.cs" />
128134
<Compile Include="Proxy\ISqlContextProxy.cs" />
129135
<Compile Include="Proxy\IStatusTrackerProxy.cs" />
130136
<Compile Include="Proxy\IStreamingContextProxy.cs" />
@@ -134,17 +140,21 @@
134140
<Compile Include="Services\ILoggerService.cs" />
135141
<Compile Include="Services\Log4NetLoggerService.cs" />
136142
<Compile Include="Services\LoggerServiceFactory.cs" />
143+
<Compile Include="Sql\Builder.cs" />
144+
<Compile Include="Sql\Catalog.cs" />
137145
<Compile Include="Sql\Column.cs" />
138146
<Compile Include="Sql\DataFrame.cs" />
139147
<Compile Include="Sql\DataFrameNaFunctions.cs" />
140148
<Compile Include="Sql\DataFrameReader.cs" />
141149
<Compile Include="Sql\DataFrameWriter.cs" />
150+
<Compile Include="Sql\Dataset.cs" />
142151
<Compile Include="Sql\HiveContext.cs" />
143152
<Compile Include="Sql\PythonSerDe.cs" />
144153
<Compile Include="Sql\RowConstructor.cs" />
145154
<Compile Include="Sql\Row.cs" />
146155
<Compile Include="Sql\Functions.cs" />
147156
<Compile Include="Sql\SaveMode.cs" />
157+
<Compile Include="Sql\SparkSession.cs" />
148158
<Compile Include="Sql\SqlContext.cs" />
149159
<Compile Include="Sql\Types.cs" />
150160
<Compile Include="Sql\UserDefinedFunction.cs" />

csharp/Adapter/Microsoft.Spark.CSharp/Configuration/ConfigurationService.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ internal ConfigurationService()
6565
configuration = new SparkCLRConfiguration(appConfig);
6666
runMode = RunMode.CLUSTER;
6767
}
68-
else if (sparkMaster.Equals("yarn-client", StringComparison.OrdinalIgnoreCase) || sparkMaster.Equals("yarn-cluster", StringComparison.OrdinalIgnoreCase))
68+
else if (sparkMaster.Equals("yarn-cluster", StringComparison.OrdinalIgnoreCase) ||
69+
sparkMaster.Equals("yarn-client", StringComparison.OrdinalIgnoreCase) ||
70+
sparkMaster.Equals("yarn", StringComparison.OrdinalIgnoreCase)) //supported in Spark 2.0
6971
{
7072
configuration = new SparkCLRConfiguration(appConfig);
7173
runMode = RunMode.YARN;

csharp/Adapter/Microsoft.Spark.CSharp/Core/CSharpWorkerFunc.cs

+4-3
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,14 @@ internal class CSharpWorkerFunc
2222
public CSharpWorkerFunc(Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> func)
2323
{
2424
this.func = func;
25-
stackTrace = new StackTrace(true).ToString();
25+
stackTrace = new StackTrace(true).ToString().Replace(" at ", " [STACK] ");
2626
}
2727

2828
public CSharpWorkerFunc(Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> func, string innerStackTrace)
29+
: this(func)
2930
{
30-
this.func = func;
31-
stackTrace = new StackTrace(true).ToString() + "\nInner stack trace ...\n" + innerStackTrace;
31+
stackTrace += string.Format(" [STACK] --- Inner stack trace: ---{0}{1}",
32+
Environment.NewLine, innerStackTrace.Replace(" at ", " [STACK] "));
3233
}
3334

3435
public Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> Func

0 commit comments

Comments
 (0)