diff --git a/benchmark/README.md b/benchmark/README.md index c50d0ee8..b260dcc6 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -22,10 +22,10 @@ Follow the instructions for registration and download the tool to local disk wit dbgen.exe -vf -s 300 ``` *Note*: Since there is no parallelization option for TPC-H dbgen, generating a 300GB dataset could take up to 40 hours to complete. - + - After database population generation is completed, there should be 8 tables (customer, lineitem, nation, orders, part, partsupp, region, supplier) created with the .tbl extension. -4. Convert TPC-H dataset to parquet format. +4. Convert TPC-H dataset to parquet format. - You can use a simple Spark [application](https://github.com/dotnet/spark/blob/master/benchmark/scala/src/main/scala/com/microsoft/tpch/ConvertTpchCsvToParquetApp.scala) to convert the TPC-H dataset to parquet format. You can run the following spark-submit command to submit the application, you can also adjust it according to format of [submitting application](https://spark.apache.org/docs/latest/submitting-applications.html). ``` --master local[*] --class com.microsoft.tpch.ConvertTpchCsvToParquetApp microsoft-spark-benchmark-.jar diff --git a/benchmark/csharp/Tpch/Tpch.csproj b/benchmark/csharp/Tpch/Tpch.csproj index 4b9b1bc7..72707d79 100644 --- a/benchmark/csharp/Tpch/Tpch.csproj +++ b/benchmark/csharp/Tpch/Tpch.csproj @@ -10,7 +10,6 @@ - diff --git a/benchmark/run_csharp_benchmark.sh b/benchmark/run_csharp_benchmark.sh index fe03a3c5..584afae6 100644 --- a/benchmark/run_csharp_benchmark.sh +++ b/benchmark/run_csharp_benchmark.sh @@ -9,8 +9,8 @@ CSHARP_DLL=$6 JAR_PATH=$7 CSHARP_EXECUTABLE=$8 DATA_PATH=$9 -NUM_ITERATION=$10 -IS_SQL=$11 +NUM_ITERATION=${10} +IS_SQL=${11} for i in {1..22} do diff --git a/deployment/install-worker.sh b/deployment/install-worker.sh index 1a3d231f..0fad0da1 100644 --- a/deployment/install-worker.sh +++ b/deployment/install-worker.sh @@ -48,7 +48,7 @@ IFS='-' read -ra BASE_FILENAME <<< "$(basename $SRC_WORKER_PATH_OR_URI .tar.gz)" VERSION=${BASE_FILENAME[2]} IFS='.' read -ra VERSION_CHECK <<< "$VERSION" -[[ ${#VERSION[@]} != 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; } +[[ ${#VERSION_CHECK[@]} == 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; } # Path of the final destination for the worker binaries # (the one we just downloaded and extracted) diff --git a/docs/building/ubuntu-instructions.md b/docs/building/ubuntu-instructions.md index 0a302743..fd0a32ec 100644 --- a/docs/building/ubuntu-instructions.md +++ b/docs/building/ubuntu-instructions.md @@ -119,86 +119,56 @@ You should see JARs created for the supported Spark versions: ## Building .NET Sample Applications using .NET Core CLI 1. Build the Worker - ```bash - cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/ - dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 - ``` -
- 📙 Click to see sample console output - - ``` - user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 - - Welcome to .NET Core! - --------------------- - Learn more about .NET Core: https://aka.ms/dotnet-docs - Use 'dotnet --help' to see available commands or visit: https://aka.ms/dotnet-cli-docs - - ... - output omitted - ... - - Restore completed in 20.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj. - Installing runtime.linux-x64.Microsoft.NETCore.DotNetAppHost 2.1.9. - Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostResolver 2.1.9. - Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostPolicy 2.1.9. - Installing runtime.linux-x64.Microsoft.NETCore.App 2.1.9. - Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.props. - Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.targets. - Restore completed in 37.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj. - Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll - Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll - Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/ - ``` - -
- 2. Build the Samples - - **.NET Core 2.1.x** - Due to a bug in .NET Core 2.1.x CLI that causes problems with building a dependency project that creates executables, we have to resort to modifying the `.csproj` file. We are working with the .NET team towards resolving this. + ```bash + cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/ + dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 ``` - cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/ - cat Microsoft.Spark.CSharp.Examples.csproj | grep -v "Microsoft.Spark.Worker.csproj" > Microsoft.Spark.CSharp.Examples.Patched.csproj - dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj - ``` - - **.NET Core 3.x** - If you are using .NET Core 3.x, you can avoid creating a new patched `.csproj` file and instead compile the project directly: - ``` - cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/ - dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.csproj - ``` -
📙 Click to see sample console output - + + ```bash + user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 + Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core + Copyright (C) Microsoft Corporation. All rights reserved. + + Restore completed in 36.03 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj. + Restore completed in 35.94 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj. + Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll + Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll + Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/ ``` - user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj - Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core + +
+ + 2. Build the Samples + ```bash + cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/ + dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 + ``` +
+ 📙 Click to see sample console output + + ```bash + user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 + Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core Copyright (C) Microsoft Corporation. All rights reserved. - Restoring packages for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj... - Restore completed in 53 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj. - Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props. - Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets. - Restore completed in 305.72 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj. - Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll - Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll - Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/ - ``` - + Restore completed in 37.11 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj. + Restore completed in 281.63 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj. + Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll + Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll + Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/ + ``` +
- 3. Manually copy Worker binaries into the Samples output location. - ``` - cp ~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/* ~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/ - ``` # Run Samples Once you build the samples, you can use `spark-submit` to submit your .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark. - 1. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`) - 2. Running your app follows the basic structure: + 1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`) + 2. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`) + 3. Running your app follows the basic structure: ```bash spark-submit \ [--jars ] \ diff --git a/docs/building/windows-instructions.md b/docs/building/windows-instructions.md index c1c3f080..8ea2b852 100644 --- a/docs/building/windows-instructions.md +++ b/docs/building/windows-instructions.md @@ -42,7 +42,7 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct - Verify you are able to run `spark-shell` from your command-line
📙 Click to see sample console output - + ``` Welcome to ____ __ @@ -58,26 +58,22 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct scala> sc res0: org.apache.spark.SparkContext = org.apache.spark.SparkContext@6eaa6b0c ``` - - Note: If you observe the following: - > ERROR Shell:397 - Failed to locate the winutils binary in the hadoop binary path - > java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries. - - You can ignore this if you are planning on running Spark in [Standalone mode](https://spark.apache.org/docs/latest/spark-standalone.html). If not, you would have to setup **[WinUtils](https://github.com/steveloughran/winutils)** - - - Download winutils.exe binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2. - - Save winutils.exe binary to a directory of your choice, e.g. c:\hadoop\bin. - - Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line: - ``` - set HADOOP_HOME=c:\hadoop - ``` - - Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line: - ``` - set PATH=%HADOOP_HOME%\bin;%PATH% - ``` - +
+ 6. Install **[WinUtils](https://github.com/steveloughran/winutils)** + - Download `winutils.exe` binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2. + - Save `winutils.exe` binary to a directory of your choice e.g., `c:\hadoop\bin` + - Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line: + ```powershell + set HADOOP_HOME=c:\hadoop + ``` + - Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line: + ```powershell + set PATH=%HADOOP_HOME%\bin;%PATH% + ``` + + Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from your command-line before you move to the next section. Feel there is a better way? Please [open an issue](https://github.com/dotnet/spark/issues) and feel free to contribute. > **Note**: A new instance of the command-line may be required if any environment variables were updated. @@ -86,7 +82,7 @@ Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from For the rest of the section, it is assumed that you have cloned Spark .NET repo into your machine e.g., `c:\github\dotnet-spark\` -``` +```powershell git clone https://github.com/dotnet/spark.git c:\github\dotnet-spark ``` @@ -96,7 +92,7 @@ When you submit a .NET application, Spark .NET has the necessary logic written i Regardless of whether you are using .NET Framework or .NET Core, you will need to build the Spark .NET Scala extension layer. This is easy to do: -``` +```powershell cd src\scala mvn clean package ``` @@ -129,8 +125,8 @@ You should see JARs created for the supported Spark versions:
📙 Click to see sample console output - ``` - Directory: C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\net461 + ```powershell + Directory: C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461 Mode LastWriteTime Length Name @@ -156,69 +152,55 @@ You should see JARs created for the supported Spark versions: > Note: We are currently working on automating .NET Core builds for Spark .NET. Until then, we appreciate your patience in performing some of the steps manually. 1. Build the Worker - ``` - cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\ - dotnet publish -f netcoreapp2.1 -r win10-x64 - ``` -
- 📙 Click to see sample console output - - ``` - PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64 - Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core - Copyright (C) Microsoft Corporation. All rights reserved. - - Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj... - Restore completed in 37.29 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj. - Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.props. - Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.targets. - Restore completed in 230.49 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj. - Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll - Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll - Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\publish\ - ``` - -
- 2. Build the Samples + ```powershell + cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\ + dotnet publish -f netcoreapp2.1 -r win10-x64 ``` - cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\ - Get-Content .\Microsoft.Spark.CSharp.Examples.csproj | Where-Object {$_ -notmatch 'Microsoft.Spark.Worker.csproj'} | Set-Content .\Microsoft.Spark.CSharp.Examples.Patched.csproj - dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj - ``` - Note the creation of a new patched `.csproj` file. This is due to a bug in .NET Core CLI that causes problems with building a dependency project that creates executables and we are working with the .NET team towards resolving this. -
📙 Click to see sample console output - - ``` - PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj - Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core - Copyright (C) Microsoft Corporation. All rights reserved. - Restoring packages for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj... - Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj... - Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props. - Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark\obj\Microsoft.Spark.csproj.nuget.g.props. - Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets. - Restore completed in 208.34 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj. - Restore completed in 208.34 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj. - Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll - Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll - Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\publish\ - ``` - -
- 3. Manually copy Worker binaries into the Samples output location. - ``` - cp c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\* C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\ - ``` + ```powershell + PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64 + Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core + Copyright (C) Microsoft Corporation. All rights reserved. + + Restore completed in 299.95 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj. + Restore completed in 306.62 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj. + Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll + Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll + Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\ + ``` + +
+ 2. Build the Samples + ```powershell + cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\ + dotnet publish -f netcoreapp2.1 -r win10-x64 + ``` +
+ 📙 Click to see sample console output + + ```powershell + PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64 + Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core + Copyright (C) Microsoft Corporation. All rights reserved. + + Restore completed in 44.22 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj. + Restore completed in 336.94 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.csproj. + Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll + Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll + Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\ + ``` + +
# Run Samples Once you build the samples, running them will be through `spark-submit` regardless of whether you are targeting .NET Framework or .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark. - 1. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core) - 2. Running your app follows the basic structure: + 1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core) + 2. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core) + 3. Running your app follows the basic structure: ```powershell spark-submit.cmd ` [--jars ] ` diff --git a/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj b/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj index 6264a302..310dccb9 100644 --- a/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj +++ b/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj @@ -9,7 +9,6 @@ - diff --git a/examples/Microsoft.Spark.FSharp.Examples/Microsoft.Spark.FSharp.Examples.fsproj b/examples/Microsoft.Spark.FSharp.Examples/Microsoft.Spark.FSharp.Examples.fsproj index 28956da7..e52ccae7 100644 --- a/examples/Microsoft.Spark.FSharp.Examples/Microsoft.Spark.FSharp.Examples.fsproj +++ b/examples/Microsoft.Spark.FSharp.Examples/Microsoft.Spark.FSharp.Examples.fsproj @@ -21,7 +21,6 @@ - diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs index bd5ad8ea..b310a817 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs @@ -1,44 +1,44 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.Spark.E2ETest.Utils; -using Xunit; - -namespace Microsoft.Spark.E2ETest.IpcTests -{ - [Collection("Spark E2E Tests")] - public class SparkContextTests - { - /// - /// Test signatures for APIs up to Spark 2.3.*. - /// - /// - /// For the RDD related tests, refer to . - /// - [Fact] - public void TestSignaturesV2_3_X() - { - SparkContext sc = SparkContext.GetOrCreate(new SparkConf()); - - _ = sc.GetConf(); - _ = sc.DefaultParallelism; - - sc.SetJobDescription("job description"); - - sc.SetJobGroup("group id", "description"); - sc.SetJobGroup("group id", "description", true); - - sc.ClearJobGroup(); - - string filePath = $"{TestEnvironment.ResourceDirectory}people.txt"; - sc.AddFile(filePath); - sc.AddFile(filePath, true); - +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.Spark.E2ETest.Utils; +using Xunit; + +namespace Microsoft.Spark.E2ETest.IpcTests +{ + [Collection("Spark E2E Tests")] + public class SparkContextTests + { + /// + /// Test signatures for APIs up to Spark 2.3.*. + /// + /// + /// For the RDD related tests, refer to . + /// + [Fact] + public void TestSignaturesV2_3_X() + { + SparkContext sc = SparkContext.GetOrCreate(new SparkConf()); + + _ = sc.GetConf(); + _ = sc.DefaultParallelism; + + sc.SetJobDescription("job description"); + + sc.SetJobGroup("group id", "description"); + sc.SetJobGroup("group id", "description", true); + + sc.ClearJobGroup(); + + string filePath = $"{TestEnvironment.ResourceDirectory}people.txt"; + sc.AddFile(filePath); + sc.AddFile(filePath, true); + using (var tempDir = new TemporaryDirectory()) { sc.SetCheckpointDir(TestEnvironment.ResourceDirectory); - } - } - } -} + } + } + } +} diff --git a/src/csharp/Microsoft.Spark.E2ETest/Utils/TemporaryDirectory.cs b/src/csharp/Microsoft.Spark.E2ETest/Utils/TemporaryDirectory.cs index 399e6c67..556b78f9 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/Utils/TemporaryDirectory.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/Utils/TemporaryDirectory.cs @@ -1,63 +1,63 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.IO; - -namespace Microsoft.Spark.E2ETest.Utils -{ - /// - /// Creates a temporary folder that is automatically cleaned up when disposed. - /// - internal sealed class TemporaryDirectory : IDisposable - { - private bool disposed = false; - - /// - /// Path to temporary folder. - /// - public string Path { get; } - +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.IO; + +namespace Microsoft.Spark.E2ETest.Utils +{ + /// + /// Creates a temporary folder that is automatically cleaned up when disposed. + /// + internal sealed class TemporaryDirectory : IDisposable + { + private bool disposed = false; + + /// + /// Path to temporary folder. + /// + public string Path { get; } + public TemporaryDirectory() { Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString()); - Cleanup(); - Directory.CreateDirectory(Path); - Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}"; - } - - public void Dispose() - { - Dispose(true); - GC.SuppressFinalize(this); - } - - private void Cleanup() - { - if (File.Exists(Path)) - { - File.Delete(Path); - } - else if (Directory.Exists(Path)) - { - Directory.Delete(Path, true); - } - } - - private void Dispose(bool disposing) - { + Cleanup(); + Directory.CreateDirectory(Path); + Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}"; + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + private void Cleanup() + { + if (File.Exists(Path)) + { + File.Delete(Path); + } + else if (Directory.Exists(Path)) + { + Directory.Delete(Path, true); + } + } + + private void Dispose(bool disposing) + { if (disposed) - { + { return; - } - - if (disposing) - { - Cleanup(); - } - - disposed = true; - } - } -} + } + + if (disposing) + { + Cleanup(); + } + + disposed = true; + } + } +} diff --git a/src/csharp/Microsoft.Spark/Services/ConfigurationService.cs b/src/csharp/Microsoft.Spark/Services/ConfigurationService.cs index c5a20796..2475d8ba 100644 --- a/src/csharp/Microsoft.Spark/Services/ConfigurationService.cs +++ b/src/csharp/Microsoft.Spark/Services/ConfigurationService.cs @@ -33,10 +33,6 @@ namespace Microsoft.Spark.Services private string _workerPath; - // Note that the following is only for the backward compatibility and - // will be removed after the next release. - private const string WorkerPathSettingKey = "DotnetWorkerPath"; - /// /// Returns the port number for socket communication between JVM and CLR. /// @@ -66,10 +62,7 @@ namespace Microsoft.Spark.Services return _workerPath; } - // Note that the "WorkerPathSettingKey" is only for the backward compatibility - // will be removed after the next release. - string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName) ?? - Environment.GetEnvironmentVariable(WorkerPathSettingKey); + string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName); // If the WorkerDirEnvName environment variable is set, the worker path is constructed // based on it. @@ -80,17 +73,6 @@ namespace Microsoft.Spark.Services return _workerPath; } - // If the WorkerDirEnvName environment variable is not set, the worker path is - // constructed based on the current assembly's directory. This requires the worker - // executable is present. - workerDir = Path.GetDirectoryName(GetType().Assembly.Location); - _workerPath = Path.Combine(workerDir, s_procFileName); - if (File.Exists(_workerPath)) - { - _logger.LogDebug($"Using the current assembly path to construct .NET worker path: {_workerPath}."); - return _workerPath; - } - // Otherwise, the worker exectuable name is returned meaning it should be PATH. _workerPath = s_procFileName; return _workerPath;