зеркало из https://github.com/dotnet/spark.git
Update the logic for Microsoft.Spark.Worker path discovery (#134)
This commit is contained in:
Родитель
c3a65a24ee
Коммит
f1c5b86d84
|
@ -22,10 +22,10 @@ Follow the instructions for registration and download the tool to local disk wit
|
|||
dbgen.exe -vf -s 300
|
||||
```
|
||||
*Note*: Since there is no parallelization option for TPC-H dbgen, generating a 300GB dataset could take up to 40 hours to complete.
|
||||
|
||||
|
||||
- After database population generation is completed, there should be 8 tables (customer, lineitem, nation, orders, part, partsupp, region, supplier) created with the .tbl extension.
|
||||
|
||||
4. Convert TPC-H dataset to parquet format.
|
||||
4. Convert TPC-H dataset to parquet format.
|
||||
- You can use a simple Spark [application](https://github.com/dotnet/spark/blob/master/benchmark/scala/src/main/scala/com/microsoft/tpch/ConvertTpchCsvToParquetApp.scala) to convert the TPC-H dataset to parquet format. You can run the following spark-submit command to submit the application, you can also adjust it according to format of [submitting application](https://spark.apache.org/docs/latest/submitting-applications.html).
|
||||
```
|
||||
<spark-submit> --master local[*] --class com.microsoft.tpch.ConvertTpchCsvToParquetApp microsoft-spark-benchmark-<version>.jar <path-to-source-directory-with-TPCH-tables> <path-to-destination-directory-to-save-parquet-file>
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Experimental\Microsoft.Spark.Experimental.csproj" />
|
||||
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
|
||||
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
|
|
|
@ -9,8 +9,8 @@ CSHARP_DLL=$6
|
|||
JAR_PATH=$7
|
||||
CSHARP_EXECUTABLE=$8
|
||||
DATA_PATH=$9
|
||||
NUM_ITERATION=$10
|
||||
IS_SQL=$11
|
||||
NUM_ITERATION=${10}
|
||||
IS_SQL=${11}
|
||||
|
||||
for i in {1..22}
|
||||
do
|
||||
|
|
|
@ -48,7 +48,7 @@ IFS='-' read -ra BASE_FILENAME <<< "$(basename $SRC_WORKER_PATH_OR_URI .tar.gz)"
|
|||
VERSION=${BASE_FILENAME[2]}
|
||||
|
||||
IFS='.' read -ra VERSION_CHECK <<< "$VERSION"
|
||||
[[ ${#VERSION[@]} != 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; }
|
||||
[[ ${#VERSION_CHECK[@]} == 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; }
|
||||
|
||||
# Path of the final destination for the worker binaries
|
||||
# (the one we just downloaded and extracted)
|
||||
|
|
|
@ -119,86 +119,56 @@ You should see JARs created for the supported Spark versions:
|
|||
## Building .NET Sample Applications using .NET Core CLI
|
||||
|
||||
1. Build the Worker
|
||||
```bash
|
||||
cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/
|
||||
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||
```
|
||||
<details>
|
||||
<summary>📙 Click to see sample console output</summary>
|
||||
|
||||
```
|
||||
user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||
|
||||
Welcome to .NET Core!
|
||||
---------------------
|
||||
Learn more about .NET Core: https://aka.ms/dotnet-docs
|
||||
Use 'dotnet --help' to see available commands or visit: https://aka.ms/dotnet-cli-docs
|
||||
|
||||
...
|
||||
output omitted
|
||||
...
|
||||
|
||||
Restore completed in 20.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
|
||||
Installing runtime.linux-x64.Microsoft.NETCore.DotNetAppHost 2.1.9.
|
||||
Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostResolver 2.1.9.
|
||||
Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostPolicy 2.1.9.
|
||||
Installing runtime.linux-x64.Microsoft.NETCore.App 2.1.9.
|
||||
Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.props.
|
||||
Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.targets.
|
||||
Restore completed in 37.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj.
|
||||
Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll
|
||||
Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll
|
||||
Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
||||
```
|
||||
|
||||
</details>
|
||||
2. Build the Samples
|
||||
|
||||
**.NET Core 2.1.x**
|
||||
Due to a bug in .NET Core 2.1.x CLI that causes problems with building a dependency project that creates executables, we have to resort to modifying the `.csproj` file. We are working with the .NET team towards resolving this.
|
||||
```bash
|
||||
cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/
|
||||
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||
```
|
||||
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
|
||||
cat Microsoft.Spark.CSharp.Examples.csproj | grep -v "Microsoft.Spark.Worker.csproj" > Microsoft.Spark.CSharp.Examples.Patched.csproj
|
||||
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj
|
||||
```
|
||||
|
||||
**.NET Core 3.x**
|
||||
If you are using .NET Core 3.x, you can avoid creating a new patched `.csproj` file and instead compile the project directly:
|
||||
```
|
||||
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
|
||||
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.csproj
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>📙 Click to see sample console output</summary>
|
||||
|
||||
|
||||
```bash
|
||||
user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
|
||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||
|
||||
Restore completed in 36.03 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj.
|
||||
Restore completed in 35.94 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
|
||||
Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll
|
||||
Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll
|
||||
Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
||||
```
|
||||
user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj
|
||||
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
|
||||
|
||||
</details>
|
||||
|
||||
2. Build the Samples
|
||||
```bash
|
||||
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
|
||||
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||
```
|
||||
<details>
|
||||
<summary>📙 Click to see sample console output</summary>
|
||||
|
||||
```bash
|
||||
user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
|
||||
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
|
||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||
|
||||
Restoring packages for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj...
|
||||
Restore completed in 53 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
|
||||
Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props.
|
||||
Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets.
|
||||
Restore completed in 305.72 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj.
|
||||
Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll
|
||||
Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll
|
||||
Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
||||
```
|
||||
|
||||
Restore completed in 37.11 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
|
||||
Restore completed in 281.63 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj.
|
||||
Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll
|
||||
Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll
|
||||
Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
||||
```
|
||||
|
||||
</details>
|
||||
3. Manually copy Worker binaries into the Samples output location.
|
||||
```
|
||||
cp ~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/* ~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
|
||||
```
|
||||
|
||||
# Run Samples
|
||||
|
||||
Once you build the samples, you can use `spark-submit` to submit your .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
|
||||
|
||||
1. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
|
||||
2. Running your app follows the basic structure:
|
||||
1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
|
||||
2. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
|
||||
3. Running your app follows the basic structure:
|
||||
```bash
|
||||
spark-submit \
|
||||
[--jars <any-jars-your-app-is-dependent-on>] \
|
||||
|
|
|
@ -42,7 +42,7 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
|
|||
- Verify you are able to run `spark-shell` from your command-line
|
||||
<details>
|
||||
<summary>📙 Click to see sample console output</summary>
|
||||
|
||||
|
||||
```
|
||||
Welcome to
|
||||
____ __
|
||||
|
@ -58,26 +58,22 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
|
|||
scala> sc
|
||||
res0: org.apache.spark.SparkContext = org.apache.spark.SparkContext@6eaa6b0c
|
||||
```
|
||||
|
||||
Note: If you observe the following:
|
||||
> ERROR Shell:397 - Failed to locate the winutils binary in the hadoop binary path
|
||||
> java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
|
||||
|
||||
You can ignore this if you are planning on running Spark in [Standalone mode](https://spark.apache.org/docs/latest/spark-standalone.html). If not, you would have to setup **[WinUtils](https://github.com/steveloughran/winutils)**
|
||||
|
||||
- Download winutils.exe binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2.
|
||||
- Save winutils.exe binary to a directory of your choice, e.g. c:\hadoop\bin.
|
||||
- Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line:
|
||||
```
|
||||
set HADOOP_HOME=c:\hadoop
|
||||
```
|
||||
- Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line:
|
||||
```
|
||||
set PATH=%HADOOP_HOME%\bin;%PATH%
|
||||
```
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
6. Install **[WinUtils](https://github.com/steveloughran/winutils)**
|
||||
- Download `winutils.exe` binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2.
|
||||
- Save `winutils.exe` binary to a directory of your choice e.g., `c:\hadoop\bin`
|
||||
- Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line:
|
||||
```powershell
|
||||
set HADOOP_HOME=c:\hadoop
|
||||
```
|
||||
- Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line:
|
||||
```powershell
|
||||
set PATH=%HADOOP_HOME%\bin;%PATH%
|
||||
```
|
||||
|
||||
|
||||
Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from your command-line before you move to the next section. Feel there is a better way? Please [open an issue](https://github.com/dotnet/spark/issues) and feel free to contribute.
|
||||
|
||||
> **Note**: A new instance of the command-line may be required if any environment variables were updated.
|
||||
|
@ -86,7 +82,7 @@ Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from
|
|||
|
||||
For the rest of the section, it is assumed that you have cloned Spark .NET repo into your machine e.g., `c:\github\dotnet-spark\`
|
||||
|
||||
```
|
||||
```powershell
|
||||
git clone https://github.com/dotnet/spark.git c:\github\dotnet-spark
|
||||
```
|
||||
|
||||
|
@ -96,7 +92,7 @@ When you submit a .NET application, Spark .NET has the necessary logic written i
|
|||
|
||||
Regardless of whether you are using .NET Framework or .NET Core, you will need to build the Spark .NET Scala extension layer. This is easy to do:
|
||||
|
||||
```
|
||||
```powershell
|
||||
cd src\scala
|
||||
mvn clean package
|
||||
```
|
||||
|
@ -129,8 +125,8 @@ You should see JARs created for the supported Spark versions:
|
|||
<details>
|
||||
<summary>📙 Click to see sample console output</summary>
|
||||
|
||||
```
|
||||
Directory: C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\net461
|
||||
```powershell
|
||||
Directory: C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461
|
||||
|
||||
|
||||
Mode LastWriteTime Length Name
|
||||
|
@ -156,69 +152,55 @@ You should see JARs created for the supported Spark versions:
|
|||
> Note: We are currently working on automating .NET Core builds for Spark .NET. Until then, we appreciate your patience in performing some of the steps manually.
|
||||
|
||||
1. Build the Worker
|
||||
```
|
||||
cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\
|
||||
dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||
```
|
||||
<details>
|
||||
<summary>📙 Click to see sample console output</summary>
|
||||
|
||||
```
|
||||
PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
|
||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||
|
||||
Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj...
|
||||
Restore completed in 37.29 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
|
||||
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.props.
|
||||
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.targets.
|
||||
Restore completed in 230.49 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj.
|
||||
Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll
|
||||
Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll
|
||||
Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\publish\
|
||||
```
|
||||
|
||||
</details>
|
||||
2. Build the Samples
|
||||
```powershell
|
||||
cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\
|
||||
dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||
```
|
||||
cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\
|
||||
Get-Content .\Microsoft.Spark.CSharp.Examples.csproj | Where-Object {$_ -notmatch 'Microsoft.Spark.Worker.csproj'} | Set-Content .\Microsoft.Spark.CSharp.Examples.Patched.csproj
|
||||
dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj
|
||||
```
|
||||
Note the creation of a new patched `.csproj` file. This is due to a bug in .NET Core CLI that causes problems with building a dependency project that creates executables and we are working with the .NET team towards resolving this.
|
||||
|
||||
<details>
|
||||
<summary>📙 Click to see sample console output</summary>
|
||||
|
||||
```
|
||||
PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj
|
||||
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
|
||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||
|
||||
Restoring packages for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj...
|
||||
Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj...
|
||||
Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props.
|
||||
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark\obj\Microsoft.Spark.csproj.nuget.g.props.
|
||||
Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets.
|
||||
Restore completed in 208.34 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj.
|
||||
Restore completed in 208.34 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
|
||||
Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll
|
||||
Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll
|
||||
Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\publish\
|
||||
```
|
||||
|
||||
</details>
|
||||
3. Manually copy Worker binaries into the Samples output location.
|
||||
```
|
||||
cp c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\* C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\
|
||||
```
|
||||
```powershell
|
||||
PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
|
||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||
|
||||
Restore completed in 299.95 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
|
||||
Restore completed in 306.62 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj.
|
||||
Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll
|
||||
Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll
|
||||
Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\
|
||||
```
|
||||
|
||||
</details>
|
||||
2. Build the Samples
|
||||
```powershell
|
||||
cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\
|
||||
dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||
```
|
||||
<details>
|
||||
<summary>📙 Click to see sample console output</summary>
|
||||
|
||||
```powershell
|
||||
PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64
|
||||
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
|
||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||
|
||||
Restore completed in 44.22 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
|
||||
Restore completed in 336.94 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.csproj.
|
||||
Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll
|
||||
Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll
|
||||
Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
# Run Samples
|
||||
|
||||
Once you build the samples, running them will be through `spark-submit` regardless of whether you are targeting .NET Framework or .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
|
||||
|
||||
1. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
|
||||
2. Running your app follows the basic structure:
|
||||
1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
|
||||
2. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
|
||||
3. Running your app follows the basic structure:
|
||||
```powershell
|
||||
spark-submit.cmd `
|
||||
[--jars <any-jars-your-app-is-dependent-on>] `
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
|
||||
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
|
||||
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
|
|
|
@ -1,44 +1,44 @@
|
|||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using Microsoft.Spark.E2ETest.Utils;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Spark.E2ETest.IpcTests
|
||||
{
|
||||
[Collection("Spark E2E Tests")]
|
||||
public class SparkContextTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Test signatures for APIs up to Spark 2.3.*.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// For the RDD related tests, refer to <see cref="RDDTests"/>.
|
||||
/// </remarks>
|
||||
[Fact]
|
||||
public void TestSignaturesV2_3_X()
|
||||
{
|
||||
SparkContext sc = SparkContext.GetOrCreate(new SparkConf());
|
||||
|
||||
_ = sc.GetConf();
|
||||
_ = sc.DefaultParallelism;
|
||||
|
||||
sc.SetJobDescription("job description");
|
||||
|
||||
sc.SetJobGroup("group id", "description");
|
||||
sc.SetJobGroup("group id", "description", true);
|
||||
|
||||
sc.ClearJobGroup();
|
||||
|
||||
string filePath = $"{TestEnvironment.ResourceDirectory}people.txt";
|
||||
sc.AddFile(filePath);
|
||||
sc.AddFile(filePath, true);
|
||||
|
||||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using Microsoft.Spark.E2ETest.Utils;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Spark.E2ETest.IpcTests
|
||||
{
|
||||
[Collection("Spark E2E Tests")]
|
||||
public class SparkContextTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Test signatures for APIs up to Spark 2.3.*.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// For the RDD related tests, refer to <see cref="RDDTests"/>.
|
||||
/// </remarks>
|
||||
[Fact]
|
||||
public void TestSignaturesV2_3_X()
|
||||
{
|
||||
SparkContext sc = SparkContext.GetOrCreate(new SparkConf());
|
||||
|
||||
_ = sc.GetConf();
|
||||
_ = sc.DefaultParallelism;
|
||||
|
||||
sc.SetJobDescription("job description");
|
||||
|
||||
sc.SetJobGroup("group id", "description");
|
||||
sc.SetJobGroup("group id", "description", true);
|
||||
|
||||
sc.ClearJobGroup();
|
||||
|
||||
string filePath = $"{TestEnvironment.ResourceDirectory}people.txt";
|
||||
sc.AddFile(filePath);
|
||||
sc.AddFile(filePath, true);
|
||||
|
||||
using (var tempDir = new TemporaryDirectory())
|
||||
{
|
||||
sc.SetCheckpointDir(TestEnvironment.ResourceDirectory);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,63 +1,63 @@
|
|||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
namespace Microsoft.Spark.E2ETest.Utils
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a temporary folder that is automatically cleaned up when disposed.
|
||||
/// </summary>
|
||||
internal sealed class TemporaryDirectory : IDisposable
|
||||
{
|
||||
private bool disposed = false;
|
||||
|
||||
/// <summary>
|
||||
/// Path to temporary folder.
|
||||
/// </summary>
|
||||
public string Path { get; }
|
||||
|
||||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
namespace Microsoft.Spark.E2ETest.Utils
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a temporary folder that is automatically cleaned up when disposed.
|
||||
/// </summary>
|
||||
internal sealed class TemporaryDirectory : IDisposable
|
||||
{
|
||||
private bool disposed = false;
|
||||
|
||||
/// <summary>
|
||||
/// Path to temporary folder.
|
||||
/// </summary>
|
||||
public string Path { get; }
|
||||
|
||||
public TemporaryDirectory()
|
||||
{
|
||||
Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString());
|
||||
Cleanup();
|
||||
Directory.CreateDirectory(Path);
|
||||
Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}";
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Dispose(true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
{
|
||||
if (File.Exists(Path))
|
||||
{
|
||||
File.Delete(Path);
|
||||
}
|
||||
else if (Directory.Exists(Path))
|
||||
{
|
||||
Directory.Delete(Path, true);
|
||||
}
|
||||
}
|
||||
|
||||
private void Dispose(bool disposing)
|
||||
{
|
||||
Cleanup();
|
||||
Directory.CreateDirectory(Path);
|
||||
Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}";
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Dispose(true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
{
|
||||
if (File.Exists(Path))
|
||||
{
|
||||
File.Delete(Path);
|
||||
}
|
||||
else if (Directory.Exists(Path))
|
||||
{
|
||||
Directory.Delete(Path, true);
|
||||
}
|
||||
}
|
||||
|
||||
private void Dispose(bool disposing)
|
||||
{
|
||||
if (disposed)
|
||||
{
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (disposing)
|
||||
{
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
disposed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (disposing)
|
||||
{
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
disposed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,10 +33,6 @@ namespace Microsoft.Spark.Services
|
|||
|
||||
private string _workerPath;
|
||||
|
||||
// Note that the following is only for the backward compatibility and
|
||||
// will be removed after the next release.
|
||||
private const string WorkerPathSettingKey = "DotnetWorkerPath";
|
||||
|
||||
/// <summary>
|
||||
/// Returns the port number for socket communication between JVM and CLR.
|
||||
/// </summary>
|
||||
|
@ -66,10 +62,7 @@ namespace Microsoft.Spark.Services
|
|||
return _workerPath;
|
||||
}
|
||||
|
||||
// Note that the "WorkerPathSettingKey" is only for the backward compatibility
|
||||
// will be removed after the next release.
|
||||
string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName) ??
|
||||
Environment.GetEnvironmentVariable(WorkerPathSettingKey);
|
||||
string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName);
|
||||
|
||||
// If the WorkerDirEnvName environment variable is set, the worker path is constructed
|
||||
// based on it.
|
||||
|
@ -80,17 +73,6 @@ namespace Microsoft.Spark.Services
|
|||
return _workerPath;
|
||||
}
|
||||
|
||||
// If the WorkerDirEnvName environment variable is not set, the worker path is
|
||||
// constructed based on the current assembly's directory. This requires the worker
|
||||
// executable is present.
|
||||
workerDir = Path.GetDirectoryName(GetType().Assembly.Location);
|
||||
_workerPath = Path.Combine(workerDir, s_procFileName);
|
||||
if (File.Exists(_workerPath))
|
||||
{
|
||||
_logger.LogDebug($"Using the current assembly path to construct .NET worker path: {_workerPath}.");
|
||||
return _workerPath;
|
||||
}
|
||||
|
||||
// Otherwise, the worker exectuable name is returned meaning it should be PATH.
|
||||
_workerPath = s_procFileName;
|
||||
return _workerPath;
|
||||
|
|
Загрузка…
Ссылка в новой задаче