Update the logic for Microsoft.Spark.Worker path discovery (#134)

This commit is contained in:
Steve Suh 2019-06-13 16:04:53 -07:00 коммит произвёл Terry Kim
Родитель c3a65a24ee
Коммит f1c5b86d84
11 изменённых файлов: 202 добавлений и 271 удалений

Просмотреть файл

@ -22,10 +22,10 @@ Follow the instructions for registration and download the tool to local disk wit
dbgen.exe -vf -s 300 dbgen.exe -vf -s 300
``` ```
*Note*: Since there is no parallelization option for TPC-H dbgen, generating a 300GB dataset could take up to 40 hours to complete. *Note*: Since there is no parallelization option for TPC-H dbgen, generating a 300GB dataset could take up to 40 hours to complete.
- After database population generation is completed, there should be 8 tables (customer, lineitem, nation, orders, part, partsupp, region, supplier) created with the .tbl extension. - After database population generation is completed, there should be 8 tables (customer, lineitem, nation, orders, part, partsupp, region, supplier) created with the .tbl extension.
4. Convert TPC-H dataset to parquet format. 4. Convert TPC-H dataset to parquet format.
- You can use a simple Spark [application](https://github.com/dotnet/spark/blob/master/benchmark/scala/src/main/scala/com/microsoft/tpch/ConvertTpchCsvToParquetApp.scala) to convert the TPC-H dataset to parquet format. You can run the following spark-submit command to submit the application, you can also adjust it according to format of [submitting application](https://spark.apache.org/docs/latest/submitting-applications.html). - You can use a simple Spark [application](https://github.com/dotnet/spark/blob/master/benchmark/scala/src/main/scala/com/microsoft/tpch/ConvertTpchCsvToParquetApp.scala) to convert the TPC-H dataset to parquet format. You can run the following spark-submit command to submit the application, you can also adjust it according to format of [submitting application](https://spark.apache.org/docs/latest/submitting-applications.html).
``` ```
<spark-submit> --master local[*] --class com.microsoft.tpch.ConvertTpchCsvToParquetApp microsoft-spark-benchmark-<version>.jar <path-to-source-directory-with-TPCH-tables> <path-to-destination-directory-to-save-parquet-file> <spark-submit> --master local[*] --class com.microsoft.tpch.ConvertTpchCsvToParquetApp microsoft-spark-benchmark-<version>.jar <path-to-source-directory-with-TPCH-tables> <path-to-destination-directory-to-save-parquet-file>

Просмотреть файл

@ -10,7 +10,6 @@
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Experimental\Microsoft.Spark.Experimental.csproj" /> <ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Experimental\Microsoft.Spark.Experimental.csproj" />
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" /> <ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
</ItemGroup> </ItemGroup>

Просмотреть файл

@ -9,8 +9,8 @@ CSHARP_DLL=$6
JAR_PATH=$7 JAR_PATH=$7
CSHARP_EXECUTABLE=$8 CSHARP_EXECUTABLE=$8
DATA_PATH=$9 DATA_PATH=$9
NUM_ITERATION=$10 NUM_ITERATION=${10}
IS_SQL=$11 IS_SQL=${11}
for i in {1..22} for i in {1..22}
do do

Просмотреть файл

@ -48,7 +48,7 @@ IFS='-' read -ra BASE_FILENAME <<< "$(basename $SRC_WORKER_PATH_OR_URI .tar.gz)"
VERSION=${BASE_FILENAME[2]} VERSION=${BASE_FILENAME[2]}
IFS='.' read -ra VERSION_CHECK <<< "$VERSION" IFS='.' read -ra VERSION_CHECK <<< "$VERSION"
[[ ${#VERSION[@]} != 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; } [[ ${#VERSION_CHECK[@]} == 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; }
# Path of the final destination for the worker binaries # Path of the final destination for the worker binaries
# (the one we just downloaded and extracted) # (the one we just downloaded and extracted)

Просмотреть файл

@ -119,86 +119,56 @@ You should see JARs created for the supported Spark versions:
## Building .NET Sample Applications using .NET Core CLI ## Building .NET Sample Applications using .NET Core CLI
1. Build the Worker 1. Build the Worker
```bash ```bash
cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/ cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
```
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```
user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
Welcome to .NET Core!
---------------------
Learn more about .NET Core: https://aka.ms/dotnet-docs
Use 'dotnet --help' to see available commands or visit: https://aka.ms/dotnet-cli-docs
...
output omitted
...
Restore completed in 20.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
Installing runtime.linux-x64.Microsoft.NETCore.DotNetAppHost 2.1.9.
Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostResolver 2.1.9.
Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostPolicy 2.1.9.
Installing runtime.linux-x64.Microsoft.NETCore.App 2.1.9.
Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.props.
Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.targets.
Restore completed in 37.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj.
Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll
Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll
Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
```
</details>
2. Build the Samples
**.NET Core 2.1.x**
Due to a bug in .NET Core 2.1.x CLI that causes problems with building a dependency project that creates executables, we have to resort to modifying the `.csproj` file. We are working with the .NET team towards resolving this.
``` ```
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
cat Microsoft.Spark.CSharp.Examples.csproj | grep -v "Microsoft.Spark.Worker.csproj" > Microsoft.Spark.CSharp.Examples.Patched.csproj
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj
```
**.NET Core 3.x**
If you are using .NET Core 3.x, you can avoid creating a new patched `.csproj` file and instead compile the project directly:
```
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.csproj
```
<details> <details>
<summary>&#x1F4D9; Click to see sample console output</summary> <summary>&#x1F4D9; Click to see sample console output</summary>
```bash
user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restore completed in 36.03 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj.
Restore completed in 35.94 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll
Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll
Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
``` ```
user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core </details>
2. Build the Samples
```bash
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
```
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```bash
user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved. Copyright (C) Microsoft Corporation. All rights reserved.
Restoring packages for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj... Restore completed in 37.11 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
Restore completed in 53 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj. Restore completed in 281.63 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj.
Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props. Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll
Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets. Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll
Restore completed in 305.72 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj. Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll ```
Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll
Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
```
</details> </details>
3. Manually copy Worker binaries into the Samples output location.
```
cp ~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/* ~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
```
# Run Samples # Run Samples
Once you build the samples, you can use `spark-submit` to submit your .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark. Once you build the samples, you can use `spark-submit` to submit your .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
1. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`) 1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
2. Running your app follows the basic structure: 2. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
3. Running your app follows the basic structure:
```bash ```bash
spark-submit \ spark-submit \
[--jars <any-jars-your-app-is-dependent-on>] \ [--jars <any-jars-your-app-is-dependent-on>] \

Просмотреть файл

@ -42,7 +42,7 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
- Verify you are able to run `spark-shell` from your command-line - Verify you are able to run `spark-shell` from your command-line
<details> <details>
<summary>&#x1F4D9; Click to see sample console output</summary> <summary>&#x1F4D9; Click to see sample console output</summary>
``` ```
Welcome to Welcome to
____ __ ____ __
@ -58,26 +58,22 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
scala> sc scala> sc
res0: org.apache.spark.SparkContext = org.apache.spark.SparkContext@6eaa6b0c res0: org.apache.spark.SparkContext = org.apache.spark.SparkContext@6eaa6b0c
``` ```
Note: If you observe the following:
> ERROR Shell:397 - Failed to locate the winutils binary in the hadoop binary path
> java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
You can ignore this if you are planning on running Spark in [Standalone mode](https://spark.apache.org/docs/latest/spark-standalone.html). If not, you would have to setup **[WinUtils](https://github.com/steveloughran/winutils)**
- Download winutils.exe binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2.
- Save winutils.exe binary to a directory of your choice, e.g. c:\hadoop\bin.
- Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line:
```
set HADOOP_HOME=c:\hadoop
```
- Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line:
```
set PATH=%HADOOP_HOME%\bin;%PATH%
```
</details> </details>
6. Install **[WinUtils](https://github.com/steveloughran/winutils)**
- Download `winutils.exe` binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2.
- Save `winutils.exe` binary to a directory of your choice e.g., `c:\hadoop\bin`
- Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line:
```powershell
set HADOOP_HOME=c:\hadoop
```
- Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line:
```powershell
set PATH=%HADOOP_HOME%\bin;%PATH%
```
Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from your command-line before you move to the next section. Feel there is a better way? Please [open an issue](https://github.com/dotnet/spark/issues) and feel free to contribute. Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from your command-line before you move to the next section. Feel there is a better way? Please [open an issue](https://github.com/dotnet/spark/issues) and feel free to contribute.
> **Note**: A new instance of the command-line may be required if any environment variables were updated. > **Note**: A new instance of the command-line may be required if any environment variables were updated.
@ -86,7 +82,7 @@ Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from
For the rest of the section, it is assumed that you have cloned Spark .NET repo into your machine e.g., `c:\github\dotnet-spark\` For the rest of the section, it is assumed that you have cloned Spark .NET repo into your machine e.g., `c:\github\dotnet-spark\`
``` ```powershell
git clone https://github.com/dotnet/spark.git c:\github\dotnet-spark git clone https://github.com/dotnet/spark.git c:\github\dotnet-spark
``` ```
@ -96,7 +92,7 @@ When you submit a .NET application, Spark .NET has the necessary logic written i
Regardless of whether you are using .NET Framework or .NET Core, you will need to build the Spark .NET Scala extension layer. This is easy to do: Regardless of whether you are using .NET Framework or .NET Core, you will need to build the Spark .NET Scala extension layer. This is easy to do:
``` ```powershell
cd src\scala cd src\scala
mvn clean package mvn clean package
``` ```
@ -129,8 +125,8 @@ You should see JARs created for the supported Spark versions:
<details> <details>
<summary>&#x1F4D9; Click to see sample console output</summary> <summary>&#x1F4D9; Click to see sample console output</summary>
``` ```powershell
Directory: C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\net461 Directory: C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461
Mode LastWriteTime Length Name Mode LastWriteTime Length Name
@ -156,69 +152,55 @@ You should see JARs created for the supported Spark versions:
> Note: We are currently working on automating .NET Core builds for Spark .NET. Until then, we appreciate your patience in performing some of the steps manually. > Note: We are currently working on automating .NET Core builds for Spark .NET. Until then, we appreciate your patience in performing some of the steps manually.
1. Build the Worker 1. Build the Worker
``` ```powershell
cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\ cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\
dotnet publish -f netcoreapp2.1 -r win10-x64 dotnet publish -f netcoreapp2.1 -r win10-x64
```
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```
PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj...
Restore completed in 37.29 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.props.
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.targets.
Restore completed in 230.49 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj.
Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll
Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll
Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\publish\
```
</details>
2. Build the Samples
``` ```
cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\
Get-Content .\Microsoft.Spark.CSharp.Examples.csproj | Where-Object {$_ -notmatch 'Microsoft.Spark.Worker.csproj'} | Set-Content .\Microsoft.Spark.CSharp.Examples.Patched.csproj
dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj
```
Note the creation of a new patched `.csproj` file. This is due to a bug in .NET Core CLI that causes problems with building a dependency project that creates executables and we are working with the .NET team towards resolving this.
<details> <details>
<summary>&#x1F4D9; Click to see sample console output</summary> <summary>&#x1F4D9; Click to see sample console output</summary>
```
PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restoring packages for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj... ```powershell
Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj... PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64
Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props. Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark\obj\Microsoft.Spark.csproj.nuget.g.props. Copyright (C) Microsoft Corporation. All rights reserved.
Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets.
Restore completed in 208.34 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj. Restore completed in 299.95 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
Restore completed in 208.34 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj. Restore completed in 306.62 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj.
Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll
Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll
Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\publish\ Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\
``` ```
</details> </details>
3. Manually copy Worker binaries into the Samples output location. 2. Build the Samples
``` ```powershell
cp c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\* C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\ cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\
``` dotnet publish -f netcoreapp2.1 -r win10-x64
```
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```powershell
PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restore completed in 44.22 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
Restore completed in 336.94 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.csproj.
Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll
Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll
Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\
```
</details>
# Run Samples # Run Samples
Once you build the samples, running them will be through `spark-submit` regardless of whether you are targeting .NET Framework or .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark. Once you build the samples, running them will be through `spark-submit` regardless of whether you are targeting .NET Framework or .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
1. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core) 1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
2. Running your app follows the basic structure: 2. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
3. Running your app follows the basic structure:
```powershell ```powershell
spark-submit.cmd ` spark-submit.cmd `
[--jars <any-jars-your-app-is-dependent-on>] ` [--jars <any-jars-your-app-is-dependent-on>] `

Просмотреть файл

@ -9,7 +9,6 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" /> <ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
</ItemGroup> </ItemGroup>

Просмотреть файл

@ -21,7 +21,6 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" /> <ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
</ItemGroup> </ItemGroup>

Просмотреть файл

@ -1,44 +1,44 @@
// Licensed to the .NET Foundation under one or more agreements. // Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license. // The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information. // See the LICENSE file in the project root for more information.
using Microsoft.Spark.E2ETest.Utils; using Microsoft.Spark.E2ETest.Utils;
using Xunit; using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests namespace Microsoft.Spark.E2ETest.IpcTests
{ {
[Collection("Spark E2E Tests")] [Collection("Spark E2E Tests")]
public class SparkContextTests public class SparkContextTests
{ {
/// <summary> /// <summary>
/// Test signatures for APIs up to Spark 2.3.*. /// Test signatures for APIs up to Spark 2.3.*.
/// </summary> /// </summary>
/// <remarks> /// <remarks>
/// For the RDD related tests, refer to <see cref="RDDTests"/>. /// For the RDD related tests, refer to <see cref="RDDTests"/>.
/// </remarks> /// </remarks>
[Fact] [Fact]
public void TestSignaturesV2_3_X() public void TestSignaturesV2_3_X()
{ {
SparkContext sc = SparkContext.GetOrCreate(new SparkConf()); SparkContext sc = SparkContext.GetOrCreate(new SparkConf());
_ = sc.GetConf(); _ = sc.GetConf();
_ = sc.DefaultParallelism; _ = sc.DefaultParallelism;
sc.SetJobDescription("job description"); sc.SetJobDescription("job description");
sc.SetJobGroup("group id", "description"); sc.SetJobGroup("group id", "description");
sc.SetJobGroup("group id", "description", true); sc.SetJobGroup("group id", "description", true);
sc.ClearJobGroup(); sc.ClearJobGroup();
string filePath = $"{TestEnvironment.ResourceDirectory}people.txt"; string filePath = $"{TestEnvironment.ResourceDirectory}people.txt";
sc.AddFile(filePath); sc.AddFile(filePath);
sc.AddFile(filePath, true); sc.AddFile(filePath, true);
using (var tempDir = new TemporaryDirectory()) using (var tempDir = new TemporaryDirectory())
{ {
sc.SetCheckpointDir(TestEnvironment.ResourceDirectory); sc.SetCheckpointDir(TestEnvironment.ResourceDirectory);
} }
} }
} }
} }

Просмотреть файл

@ -1,63 +1,63 @@
// Licensed to the .NET Foundation under one or more agreements. // Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license. // The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information. // See the LICENSE file in the project root for more information.
using System; using System;
using System.IO; using System.IO;
namespace Microsoft.Spark.E2ETest.Utils namespace Microsoft.Spark.E2ETest.Utils
{ {
/// <summary> /// <summary>
/// Creates a temporary folder that is automatically cleaned up when disposed. /// Creates a temporary folder that is automatically cleaned up when disposed.
/// </summary> /// </summary>
internal sealed class TemporaryDirectory : IDisposable internal sealed class TemporaryDirectory : IDisposable
{ {
private bool disposed = false; private bool disposed = false;
/// <summary> /// <summary>
/// Path to temporary folder. /// Path to temporary folder.
/// </summary> /// </summary>
public string Path { get; } public string Path { get; }
public TemporaryDirectory() public TemporaryDirectory()
{ {
Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString()); Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString());
Cleanup(); Cleanup();
Directory.CreateDirectory(Path); Directory.CreateDirectory(Path);
Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}"; Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}";
} }
public void Dispose() public void Dispose()
{ {
Dispose(true); Dispose(true);
GC.SuppressFinalize(this); GC.SuppressFinalize(this);
} }
private void Cleanup() private void Cleanup()
{ {
if (File.Exists(Path)) if (File.Exists(Path))
{ {
File.Delete(Path); File.Delete(Path);
} }
else if (Directory.Exists(Path)) else if (Directory.Exists(Path))
{ {
Directory.Delete(Path, true); Directory.Delete(Path, true);
} }
} }
private void Dispose(bool disposing) private void Dispose(bool disposing)
{ {
if (disposed) if (disposed)
{ {
return; return;
} }
if (disposing) if (disposing)
{ {
Cleanup(); Cleanup();
} }
disposed = true; disposed = true;
} }
} }
} }

Просмотреть файл

@ -33,10 +33,6 @@ namespace Microsoft.Spark.Services
private string _workerPath; private string _workerPath;
// Note that the following is only for the backward compatibility and
// will be removed after the next release.
private const string WorkerPathSettingKey = "DotnetWorkerPath";
/// <summary> /// <summary>
/// Returns the port number for socket communication between JVM and CLR. /// Returns the port number for socket communication between JVM and CLR.
/// </summary> /// </summary>
@ -66,10 +62,7 @@ namespace Microsoft.Spark.Services
return _workerPath; return _workerPath;
} }
// Note that the "WorkerPathSettingKey" is only for the backward compatibility string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName);
// will be removed after the next release.
string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName) ??
Environment.GetEnvironmentVariable(WorkerPathSettingKey);
// If the WorkerDirEnvName environment variable is set, the worker path is constructed // If the WorkerDirEnvName environment variable is set, the worker path is constructed
// based on it. // based on it.
@ -80,17 +73,6 @@ namespace Microsoft.Spark.Services
return _workerPath; return _workerPath;
} }
// If the WorkerDirEnvName environment variable is not set, the worker path is
// constructed based on the current assembly's directory. This requires the worker
// executable is present.
workerDir = Path.GetDirectoryName(GetType().Assembly.Location);
_workerPath = Path.Combine(workerDir, s_procFileName);
if (File.Exists(_workerPath))
{
_logger.LogDebug($"Using the current assembly path to construct .NET worker path: {_workerPath}.");
return _workerPath;
}
// Otherwise, the worker exectuable name is returned meaning it should be PATH. // Otherwise, the worker exectuable name is returned meaning it should be PATH.
_workerPath = s_procFileName; _workerPath = s_procFileName;
return _workerPath; return _workerPath;