зеркало из https://github.com/dotnet/spark.git
Update the logic for Microsoft.Spark.Worker path discovery (#134)
This commit is contained in:
Родитель
c3a65a24ee
Коммит
f1c5b86d84
|
@ -22,10 +22,10 @@ Follow the instructions for registration and download the tool to local disk wit
|
||||||
dbgen.exe -vf -s 300
|
dbgen.exe -vf -s 300
|
||||||
```
|
```
|
||||||
*Note*: Since there is no parallelization option for TPC-H dbgen, generating a 300GB dataset could take up to 40 hours to complete.
|
*Note*: Since there is no parallelization option for TPC-H dbgen, generating a 300GB dataset could take up to 40 hours to complete.
|
||||||
|
|
||||||
- After database population generation is completed, there should be 8 tables (customer, lineitem, nation, orders, part, partsupp, region, supplier) created with the .tbl extension.
|
- After database population generation is completed, there should be 8 tables (customer, lineitem, nation, orders, part, partsupp, region, supplier) created with the .tbl extension.
|
||||||
|
|
||||||
4. Convert TPC-H dataset to parquet format.
|
4. Convert TPC-H dataset to parquet format.
|
||||||
- You can use a simple Spark [application](https://github.com/dotnet/spark/blob/master/benchmark/scala/src/main/scala/com/microsoft/tpch/ConvertTpchCsvToParquetApp.scala) to convert the TPC-H dataset to parquet format. You can run the following spark-submit command to submit the application, you can also adjust it according to format of [submitting application](https://spark.apache.org/docs/latest/submitting-applications.html).
|
- You can use a simple Spark [application](https://github.com/dotnet/spark/blob/master/benchmark/scala/src/main/scala/com/microsoft/tpch/ConvertTpchCsvToParquetApp.scala) to convert the TPC-H dataset to parquet format. You can run the following spark-submit command to submit the application, you can also adjust it according to format of [submitting application](https://spark.apache.org/docs/latest/submitting-applications.html).
|
||||||
```
|
```
|
||||||
<spark-submit> --master local[*] --class com.microsoft.tpch.ConvertTpchCsvToParquetApp microsoft-spark-benchmark-<version>.jar <path-to-source-directory-with-TPCH-tables> <path-to-destination-directory-to-save-parquet-file>
|
<spark-submit> --master local[*] --class com.microsoft.tpch.ConvertTpchCsvToParquetApp microsoft-spark-benchmark-<version>.jar <path-to-source-directory-with-TPCH-tables> <path-to-destination-directory-to-save-parquet-file>
|
||||||
|
|
|
@ -10,7 +10,6 @@
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Experimental\Microsoft.Spark.Experimental.csproj" />
|
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Experimental\Microsoft.Spark.Experimental.csproj" />
|
||||||
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
|
|
||||||
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
|
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
|
|
@ -9,8 +9,8 @@ CSHARP_DLL=$6
|
||||||
JAR_PATH=$7
|
JAR_PATH=$7
|
||||||
CSHARP_EXECUTABLE=$8
|
CSHARP_EXECUTABLE=$8
|
||||||
DATA_PATH=$9
|
DATA_PATH=$9
|
||||||
NUM_ITERATION=$10
|
NUM_ITERATION=${10}
|
||||||
IS_SQL=$11
|
IS_SQL=${11}
|
||||||
|
|
||||||
for i in {1..22}
|
for i in {1..22}
|
||||||
do
|
do
|
||||||
|
|
|
@ -48,7 +48,7 @@ IFS='-' read -ra BASE_FILENAME <<< "$(basename $SRC_WORKER_PATH_OR_URI .tar.gz)"
|
||||||
VERSION=${BASE_FILENAME[2]}
|
VERSION=${BASE_FILENAME[2]}
|
||||||
|
|
||||||
IFS='.' read -ra VERSION_CHECK <<< "$VERSION"
|
IFS='.' read -ra VERSION_CHECK <<< "$VERSION"
|
||||||
[[ ${#VERSION[@]} != 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; }
|
[[ ${#VERSION_CHECK[@]} == 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; }
|
||||||
|
|
||||||
# Path of the final destination for the worker binaries
|
# Path of the final destination for the worker binaries
|
||||||
# (the one we just downloaded and extracted)
|
# (the one we just downloaded and extracted)
|
||||||
|
|
|
@ -119,86 +119,56 @@ You should see JARs created for the supported Spark versions:
|
||||||
## Building .NET Sample Applications using .NET Core CLI
|
## Building .NET Sample Applications using .NET Core CLI
|
||||||
|
|
||||||
1. Build the Worker
|
1. Build the Worker
|
||||||
```bash
|
```bash
|
||||||
cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/
|
cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/
|
||||||
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||||
```
|
|
||||||
<details>
|
|
||||||
<summary>📙 Click to see sample console output</summary>
|
|
||||||
|
|
||||||
```
|
|
||||||
user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
|
||||||
|
|
||||||
Welcome to .NET Core!
|
|
||||||
---------------------
|
|
||||||
Learn more about .NET Core: https://aka.ms/dotnet-docs
|
|
||||||
Use 'dotnet --help' to see available commands or visit: https://aka.ms/dotnet-cli-docs
|
|
||||||
|
|
||||||
...
|
|
||||||
output omitted
|
|
||||||
...
|
|
||||||
|
|
||||||
Restore completed in 20.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
|
|
||||||
Installing runtime.linux-x64.Microsoft.NETCore.DotNetAppHost 2.1.9.
|
|
||||||
Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostResolver 2.1.9.
|
|
||||||
Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostPolicy 2.1.9.
|
|
||||||
Installing runtime.linux-x64.Microsoft.NETCore.App 2.1.9.
|
|
||||||
Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.props.
|
|
||||||
Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.targets.
|
|
||||||
Restore completed in 37.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj.
|
|
||||||
Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll
|
|
||||||
Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll
|
|
||||||
Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
2. Build the Samples
|
|
||||||
|
|
||||||
**.NET Core 2.1.x**
|
|
||||||
Due to a bug in .NET Core 2.1.x CLI that causes problems with building a dependency project that creates executables, we have to resort to modifying the `.csproj` file. We are working with the .NET team towards resolving this.
|
|
||||||
```
|
```
|
||||||
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
|
|
||||||
cat Microsoft.Spark.CSharp.Examples.csproj | grep -v "Microsoft.Spark.Worker.csproj" > Microsoft.Spark.CSharp.Examples.Patched.csproj
|
|
||||||
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj
|
|
||||||
```
|
|
||||||
|
|
||||||
**.NET Core 3.x**
|
|
||||||
If you are using .NET Core 3.x, you can avoid creating a new patched `.csproj` file and instead compile the project directly:
|
|
||||||
```
|
|
||||||
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
|
|
||||||
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.csproj
|
|
||||||
```
|
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary>📙 Click to see sample console output</summary>
|
<summary>📙 Click to see sample console output</summary>
|
||||||
|
|
||||||
|
```bash
|
||||||
|
user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||||
|
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
|
||||||
|
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||||
|
|
||||||
|
Restore completed in 36.03 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj.
|
||||||
|
Restore completed in 35.94 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
|
||||||
|
Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll
|
||||||
|
Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll
|
||||||
|
Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
||||||
```
|
```
|
||||||
user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj
|
|
||||||
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
|
</details>
|
||||||
|
|
||||||
|
2. Build the Samples
|
||||||
|
```bash
|
||||||
|
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
|
||||||
|
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||||
|
```
|
||||||
|
<details>
|
||||||
|
<summary>📙 Click to see sample console output</summary>
|
||||||
|
|
||||||
|
```bash
|
||||||
|
user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||||
|
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
|
||||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||||
|
|
||||||
Restoring packages for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj...
|
Restore completed in 37.11 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
|
||||||
Restore completed in 53 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
|
Restore completed in 281.63 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj.
|
||||||
Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props.
|
Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll
|
||||||
Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets.
|
Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll
|
||||||
Restore completed in 305.72 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj.
|
Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
||||||
Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll
|
```
|
||||||
Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll
|
|
||||||
Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
3. Manually copy Worker binaries into the Samples output location.
|
|
||||||
```
|
|
||||||
cp ~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/* ~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
|
||||||
```
|
|
||||||
|
|
||||||
# Run Samples
|
# Run Samples
|
||||||
|
|
||||||
Once you build the samples, you can use `spark-submit` to submit your .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
|
Once you build the samples, you can use `spark-submit` to submit your .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
|
||||||
|
|
||||||
1. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
|
1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
|
||||||
2. Running your app follows the basic structure:
|
2. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
|
||||||
|
3. Running your app follows the basic structure:
|
||||||
```bash
|
```bash
|
||||||
spark-submit \
|
spark-submit \
|
||||||
[--jars <any-jars-your-app-is-dependent-on>] \
|
[--jars <any-jars-your-app-is-dependent-on>] \
|
||||||
|
|
|
@ -42,7 +42,7 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
|
||||||
- Verify you are able to run `spark-shell` from your command-line
|
- Verify you are able to run `spark-shell` from your command-line
|
||||||
<details>
|
<details>
|
||||||
<summary>📙 Click to see sample console output</summary>
|
<summary>📙 Click to see sample console output</summary>
|
||||||
|
|
||||||
```
|
```
|
||||||
Welcome to
|
Welcome to
|
||||||
____ __
|
____ __
|
||||||
|
@ -58,26 +58,22 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
|
||||||
scala> sc
|
scala> sc
|
||||||
res0: org.apache.spark.SparkContext = org.apache.spark.SparkContext@6eaa6b0c
|
res0: org.apache.spark.SparkContext = org.apache.spark.SparkContext@6eaa6b0c
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: If you observe the following:
|
|
||||||
> ERROR Shell:397 - Failed to locate the winutils binary in the hadoop binary path
|
|
||||||
> java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
|
|
||||||
|
|
||||||
You can ignore this if you are planning on running Spark in [Standalone mode](https://spark.apache.org/docs/latest/spark-standalone.html). If not, you would have to setup **[WinUtils](https://github.com/steveloughran/winutils)**
|
|
||||||
|
|
||||||
- Download winutils.exe binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2.
|
|
||||||
- Save winutils.exe binary to a directory of your choice, e.g. c:\hadoop\bin.
|
|
||||||
- Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line:
|
|
||||||
```
|
|
||||||
set HADOOP_HOME=c:\hadoop
|
|
||||||
```
|
|
||||||
- Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line:
|
|
||||||
```
|
|
||||||
set PATH=%HADOOP_HOME%\bin;%PATH%
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
6. Install **[WinUtils](https://github.com/steveloughran/winutils)**
|
||||||
|
- Download `winutils.exe` binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2.
|
||||||
|
- Save `winutils.exe` binary to a directory of your choice e.g., `c:\hadoop\bin`
|
||||||
|
- Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line:
|
||||||
|
```powershell
|
||||||
|
set HADOOP_HOME=c:\hadoop
|
||||||
|
```
|
||||||
|
- Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line:
|
||||||
|
```powershell
|
||||||
|
set PATH=%HADOOP_HOME%\bin;%PATH%
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from your command-line before you move to the next section. Feel there is a better way? Please [open an issue](https://github.com/dotnet/spark/issues) and feel free to contribute.
|
Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from your command-line before you move to the next section. Feel there is a better way? Please [open an issue](https://github.com/dotnet/spark/issues) and feel free to contribute.
|
||||||
|
|
||||||
> **Note**: A new instance of the command-line may be required if any environment variables were updated.
|
> **Note**: A new instance of the command-line may be required if any environment variables were updated.
|
||||||
|
@ -86,7 +82,7 @@ Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from
|
||||||
|
|
||||||
For the rest of the section, it is assumed that you have cloned Spark .NET repo into your machine e.g., `c:\github\dotnet-spark\`
|
For the rest of the section, it is assumed that you have cloned Spark .NET repo into your machine e.g., `c:\github\dotnet-spark\`
|
||||||
|
|
||||||
```
|
```powershell
|
||||||
git clone https://github.com/dotnet/spark.git c:\github\dotnet-spark
|
git clone https://github.com/dotnet/spark.git c:\github\dotnet-spark
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -96,7 +92,7 @@ When you submit a .NET application, Spark .NET has the necessary logic written i
|
||||||
|
|
||||||
Regardless of whether you are using .NET Framework or .NET Core, you will need to build the Spark .NET Scala extension layer. This is easy to do:
|
Regardless of whether you are using .NET Framework or .NET Core, you will need to build the Spark .NET Scala extension layer. This is easy to do:
|
||||||
|
|
||||||
```
|
```powershell
|
||||||
cd src\scala
|
cd src\scala
|
||||||
mvn clean package
|
mvn clean package
|
||||||
```
|
```
|
||||||
|
@ -129,8 +125,8 @@ You should see JARs created for the supported Spark versions:
|
||||||
<details>
|
<details>
|
||||||
<summary>📙 Click to see sample console output</summary>
|
<summary>📙 Click to see sample console output</summary>
|
||||||
|
|
||||||
```
|
```powershell
|
||||||
Directory: C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\net461
|
Directory: C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461
|
||||||
|
|
||||||
|
|
||||||
Mode LastWriteTime Length Name
|
Mode LastWriteTime Length Name
|
||||||
|
@ -156,69 +152,55 @@ You should see JARs created for the supported Spark versions:
|
||||||
> Note: We are currently working on automating .NET Core builds for Spark .NET. Until then, we appreciate your patience in performing some of the steps manually.
|
> Note: We are currently working on automating .NET Core builds for Spark .NET. Until then, we appreciate your patience in performing some of the steps manually.
|
||||||
|
|
||||||
1. Build the Worker
|
1. Build the Worker
|
||||||
```
|
```powershell
|
||||||
cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\
|
cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\
|
||||||
dotnet publish -f netcoreapp2.1 -r win10-x64
|
dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||||
```
|
|
||||||
<details>
|
|
||||||
<summary>📙 Click to see sample console output</summary>
|
|
||||||
|
|
||||||
```
|
|
||||||
PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64
|
|
||||||
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
|
|
||||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
|
||||||
|
|
||||||
Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj...
|
|
||||||
Restore completed in 37.29 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
|
|
||||||
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.props.
|
|
||||||
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.targets.
|
|
||||||
Restore completed in 230.49 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj.
|
|
||||||
Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll
|
|
||||||
Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll
|
|
||||||
Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\publish\
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
2. Build the Samples
|
|
||||||
```
|
```
|
||||||
cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\
|
|
||||||
Get-Content .\Microsoft.Spark.CSharp.Examples.csproj | Where-Object {$_ -notmatch 'Microsoft.Spark.Worker.csproj'} | Set-Content .\Microsoft.Spark.CSharp.Examples.Patched.csproj
|
|
||||||
dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj
|
|
||||||
```
|
|
||||||
Note the creation of a new patched `.csproj` file. This is due to a bug in .NET Core CLI that causes problems with building a dependency project that creates executables and we are working with the .NET team towards resolving this.
|
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary>📙 Click to see sample console output</summary>
|
<summary>📙 Click to see sample console output</summary>
|
||||||
|
|
||||||
```
|
|
||||||
PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj
|
|
||||||
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
|
|
||||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
|
||||||
|
|
||||||
Restoring packages for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj...
|
```powershell
|
||||||
Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj...
|
PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||||
Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props.
|
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
|
||||||
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark\obj\Microsoft.Spark.csproj.nuget.g.props.
|
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||||
Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets.
|
|
||||||
Restore completed in 208.34 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj.
|
Restore completed in 299.95 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
|
||||||
Restore completed in 208.34 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
|
Restore completed in 306.62 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj.
|
||||||
Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll
|
Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll
|
||||||
Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll
|
Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll
|
||||||
Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\publish\
|
Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
3. Manually copy Worker binaries into the Samples output location.
|
2. Build the Samples
|
||||||
```
|
```powershell
|
||||||
cp c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\* C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\
|
cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\
|
||||||
```
|
dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||||
|
```
|
||||||
|
<details>
|
||||||
|
<summary>📙 Click to see sample console output</summary>
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||||
|
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
|
||||||
|
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||||
|
|
||||||
|
Restore completed in 44.22 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
|
||||||
|
Restore completed in 336.94 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.csproj.
|
||||||
|
Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll
|
||||||
|
Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll
|
||||||
|
Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
# Run Samples
|
# Run Samples
|
||||||
|
|
||||||
Once you build the samples, running them will be through `spark-submit` regardless of whether you are targeting .NET Framework or .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
|
Once you build the samples, running them will be through `spark-submit` regardless of whether you are targeting .NET Framework or .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
|
||||||
|
|
||||||
1. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
|
1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
|
||||||
2. Running your app follows the basic structure:
|
2. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
|
||||||
|
3. Running your app follows the basic structure:
|
||||||
```powershell
|
```powershell
|
||||||
spark-submit.cmd `
|
spark-submit.cmd `
|
||||||
[--jars <any-jars-your-app-is-dependent-on>] `
|
[--jars <any-jars-your-app-is-dependent-on>] `
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
|
|
||||||
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
|
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
|
|
||||||
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
|
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
|
|
@ -1,44 +1,44 @@
|
||||||
// Licensed to the .NET Foundation under one or more agreements.
|
// Licensed to the .NET Foundation under one or more agreements.
|
||||||
// The .NET Foundation licenses this file to you under the MIT license.
|
// The .NET Foundation licenses this file to you under the MIT license.
|
||||||
// See the LICENSE file in the project root for more information.
|
// See the LICENSE file in the project root for more information.
|
||||||
|
|
||||||
using Microsoft.Spark.E2ETest.Utils;
|
using Microsoft.Spark.E2ETest.Utils;
|
||||||
using Xunit;
|
using Xunit;
|
||||||
|
|
||||||
namespace Microsoft.Spark.E2ETest.IpcTests
|
namespace Microsoft.Spark.E2ETest.IpcTests
|
||||||
{
|
{
|
||||||
[Collection("Spark E2E Tests")]
|
[Collection("Spark E2E Tests")]
|
||||||
public class SparkContextTests
|
public class SparkContextTests
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Test signatures for APIs up to Spark 2.3.*.
|
/// Test signatures for APIs up to Spark 2.3.*.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <remarks>
|
/// <remarks>
|
||||||
/// For the RDD related tests, refer to <see cref="RDDTests"/>.
|
/// For the RDD related tests, refer to <see cref="RDDTests"/>.
|
||||||
/// </remarks>
|
/// </remarks>
|
||||||
[Fact]
|
[Fact]
|
||||||
public void TestSignaturesV2_3_X()
|
public void TestSignaturesV2_3_X()
|
||||||
{
|
{
|
||||||
SparkContext sc = SparkContext.GetOrCreate(new SparkConf());
|
SparkContext sc = SparkContext.GetOrCreate(new SparkConf());
|
||||||
|
|
||||||
_ = sc.GetConf();
|
_ = sc.GetConf();
|
||||||
_ = sc.DefaultParallelism;
|
_ = sc.DefaultParallelism;
|
||||||
|
|
||||||
sc.SetJobDescription("job description");
|
sc.SetJobDescription("job description");
|
||||||
|
|
||||||
sc.SetJobGroup("group id", "description");
|
sc.SetJobGroup("group id", "description");
|
||||||
sc.SetJobGroup("group id", "description", true);
|
sc.SetJobGroup("group id", "description", true);
|
||||||
|
|
||||||
sc.ClearJobGroup();
|
sc.ClearJobGroup();
|
||||||
|
|
||||||
string filePath = $"{TestEnvironment.ResourceDirectory}people.txt";
|
string filePath = $"{TestEnvironment.ResourceDirectory}people.txt";
|
||||||
sc.AddFile(filePath);
|
sc.AddFile(filePath);
|
||||||
sc.AddFile(filePath, true);
|
sc.AddFile(filePath, true);
|
||||||
|
|
||||||
using (var tempDir = new TemporaryDirectory())
|
using (var tempDir = new TemporaryDirectory())
|
||||||
{
|
{
|
||||||
sc.SetCheckpointDir(TestEnvironment.ResourceDirectory);
|
sc.SetCheckpointDir(TestEnvironment.ResourceDirectory);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,63 +1,63 @@
|
||||||
// Licensed to the .NET Foundation under one or more agreements.
|
// Licensed to the .NET Foundation under one or more agreements.
|
||||||
// The .NET Foundation licenses this file to you under the MIT license.
|
// The .NET Foundation licenses this file to you under the MIT license.
|
||||||
// See the LICENSE file in the project root for more information.
|
// See the LICENSE file in the project root for more information.
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
|
||||||
namespace Microsoft.Spark.E2ETest.Utils
|
namespace Microsoft.Spark.E2ETest.Utils
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Creates a temporary folder that is automatically cleaned up when disposed.
|
/// Creates a temporary folder that is automatically cleaned up when disposed.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
internal sealed class TemporaryDirectory : IDisposable
|
internal sealed class TemporaryDirectory : IDisposable
|
||||||
{
|
{
|
||||||
private bool disposed = false;
|
private bool disposed = false;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Path to temporary folder.
|
/// Path to temporary folder.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string Path { get; }
|
public string Path { get; }
|
||||||
|
|
||||||
public TemporaryDirectory()
|
public TemporaryDirectory()
|
||||||
{
|
{
|
||||||
Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString());
|
Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString());
|
||||||
Cleanup();
|
Cleanup();
|
||||||
Directory.CreateDirectory(Path);
|
Directory.CreateDirectory(Path);
|
||||||
Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}";
|
Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}";
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Dispose()
|
public void Dispose()
|
||||||
{
|
{
|
||||||
Dispose(true);
|
Dispose(true);
|
||||||
GC.SuppressFinalize(this);
|
GC.SuppressFinalize(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void Cleanup()
|
private void Cleanup()
|
||||||
{
|
{
|
||||||
if (File.Exists(Path))
|
if (File.Exists(Path))
|
||||||
{
|
{
|
||||||
File.Delete(Path);
|
File.Delete(Path);
|
||||||
}
|
}
|
||||||
else if (Directory.Exists(Path))
|
else if (Directory.Exists(Path))
|
||||||
{
|
{
|
||||||
Directory.Delete(Path, true);
|
Directory.Delete(Path, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void Dispose(bool disposing)
|
private void Dispose(bool disposing)
|
||||||
{
|
{
|
||||||
if (disposed)
|
if (disposed)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (disposing)
|
if (disposing)
|
||||||
{
|
{
|
||||||
Cleanup();
|
Cleanup();
|
||||||
}
|
}
|
||||||
|
|
||||||
disposed = true;
|
disposed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,10 +33,6 @@ namespace Microsoft.Spark.Services
|
||||||
|
|
||||||
private string _workerPath;
|
private string _workerPath;
|
||||||
|
|
||||||
// Note that the following is only for the backward compatibility and
|
|
||||||
// will be removed after the next release.
|
|
||||||
private const string WorkerPathSettingKey = "DotnetWorkerPath";
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Returns the port number for socket communication between JVM and CLR.
|
/// Returns the port number for socket communication between JVM and CLR.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -66,10 +62,7 @@ namespace Microsoft.Spark.Services
|
||||||
return _workerPath;
|
return _workerPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note that the "WorkerPathSettingKey" is only for the backward compatibility
|
string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName);
|
||||||
// will be removed after the next release.
|
|
||||||
string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName) ??
|
|
||||||
Environment.GetEnvironmentVariable(WorkerPathSettingKey);
|
|
||||||
|
|
||||||
// If the WorkerDirEnvName environment variable is set, the worker path is constructed
|
// If the WorkerDirEnvName environment variable is set, the worker path is constructed
|
||||||
// based on it.
|
// based on it.
|
||||||
|
@ -80,17 +73,6 @@ namespace Microsoft.Spark.Services
|
||||||
return _workerPath;
|
return _workerPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the WorkerDirEnvName environment variable is not set, the worker path is
|
|
||||||
// constructed based on the current assembly's directory. This requires the worker
|
|
||||||
// executable is present.
|
|
||||||
workerDir = Path.GetDirectoryName(GetType().Assembly.Location);
|
|
||||||
_workerPath = Path.Combine(workerDir, s_procFileName);
|
|
||||||
if (File.Exists(_workerPath))
|
|
||||||
{
|
|
||||||
_logger.LogDebug($"Using the current assembly path to construct .NET worker path: {_workerPath}.");
|
|
||||||
return _workerPath;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise, the worker exectuable name is returned meaning it should be PATH.
|
// Otherwise, the worker exectuable name is returned meaning it should be PATH.
|
||||||
_workerPath = s_procFileName;
|
_workerPath = s_procFileName;
|
||||||
return _workerPath;
|
return _workerPath;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче