Update the logic for Microsoft.Spark.Worker path discovery (#134)

This commit is contained in:
Steve Suh 2019-06-13 16:04:53 -07:00 коммит произвёл Terry Kim
Родитель c3a65a24ee
Коммит f1c5b86d84
11 изменённых файлов: 202 добавлений и 271 удалений

Просмотреть файл

@ -22,10 +22,10 @@ Follow the instructions for registration and download the tool to local disk wit
dbgen.exe -vf -s 300
```
*Note*: Since there is no parallelization option for TPC-H dbgen, generating a 300GB dataset could take up to 40 hours to complete.
- After database population generation is completed, there should be 8 tables (customer, lineitem, nation, orders, part, partsupp, region, supplier) created with the .tbl extension.
4. Convert TPC-H dataset to parquet format.
4. Convert TPC-H dataset to parquet format.
- You can use a simple Spark [application](https://github.com/dotnet/spark/blob/master/benchmark/scala/src/main/scala/com/microsoft/tpch/ConvertTpchCsvToParquetApp.scala) to convert the TPC-H dataset to parquet format. You can run the following spark-submit command to submit the application, you can also adjust it according to format of [submitting application](https://spark.apache.org/docs/latest/submitting-applications.html).
```
<spark-submit> --master local[*] --class com.microsoft.tpch.ConvertTpchCsvToParquetApp microsoft-spark-benchmark-<version>.jar <path-to-source-directory-with-TPCH-tables> <path-to-destination-directory-to-save-parquet-file>

Просмотреть файл

@ -10,7 +10,6 @@
<ItemGroup>
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Experimental\Microsoft.Spark.Experimental.csproj" />
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
<ProjectReference Include="..\..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
</ItemGroup>

Просмотреть файл

@ -9,8 +9,8 @@ CSHARP_DLL=$6
JAR_PATH=$7
CSHARP_EXECUTABLE=$8
DATA_PATH=$9
NUM_ITERATION=$10
IS_SQL=$11
NUM_ITERATION=${10}
IS_SQL=${11}
for i in {1..22}
do

Просмотреть файл

@ -48,7 +48,7 @@ IFS='-' read -ra BASE_FILENAME <<< "$(basename $SRC_WORKER_PATH_OR_URI .tar.gz)"
VERSION=${BASE_FILENAME[2]}
IFS='.' read -ra VERSION_CHECK <<< "$VERSION"
[[ ${#VERSION[@]} != 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; }
[[ ${#VERSION_CHECK[@]} == 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; }
# Path of the final destination for the worker binaries
# (the one we just downloaded and extracted)

Просмотреть файл

@ -119,86 +119,56 @@ You should see JARs created for the supported Spark versions:
## Building .NET Sample Applications using .NET Core CLI
1. Build the Worker
```bash
cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
```
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```
user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
Welcome to .NET Core!
---------------------
Learn more about .NET Core: https://aka.ms/dotnet-docs
Use 'dotnet --help' to see available commands or visit: https://aka.ms/dotnet-cli-docs
...
output omitted
...
Restore completed in 20.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
Installing runtime.linux-x64.Microsoft.NETCore.DotNetAppHost 2.1.9.
Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostResolver 2.1.9.
Installing runtime.linux-x64.Microsoft.NETCore.DotNetHostPolicy 2.1.9.
Installing runtime.linux-x64.Microsoft.NETCore.App 2.1.9.
Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.props.
Generating MSBuild file /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/obj/Microsoft.Spark.Worker.csproj.nuget.g.targets.
Restore completed in 37.09 sec for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj.
Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll
Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll
Microsoft.Spark.Worker -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
```
</details>
2. Build the Samples
**.NET Core 2.1.x**
Due to a bug in .NET Core 2.1.x CLI that causes problems with building a dependency project that creates executables, we have to resort to modifying the `.csproj` file. We are working with the .NET team towards resolving this.
```bash
cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
```
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
cat Microsoft.Spark.CSharp.Examples.csproj | grep -v "Microsoft.Spark.Worker.csproj" > Microsoft.Spark.CSharp.Examples.Patched.csproj
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj
```
**.NET Core 3.x**
If you are using .NET Core 3.x, you can avoid creating a new patched `.csproj` file and instead compile the project directly:
```
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.csproj
```
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```bash
user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restore completed in 36.03 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj.
Restore completed in 35.94 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll
Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.Worker.dll
Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
```
user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64 Microsoft.Spark.CSharp.Examples.Patched.csproj
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
</details>
2. Build the Samples
```bash
cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/
dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
```
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```bash
user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f netcoreapp2.1 -r ubuntu.18.04-x64
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restoring packages for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj...
Restore completed in 53 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props.
Generating MSBuild file /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/obj/Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets.
Restore completed in 305.72 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.Patched.csproj.
Microsoft.Spark -> /home/user/dotnet.spark/src/csharp/Microsoft.Spark/bin/Debug/netstandard2.0/Microsoft.Spark.dll
Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll
Microsoft.Spark.CSharp.Examples.Patched -> /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/bin/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
```
Restore completed in 37.11 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj.
Restore completed in 281.63 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj.
Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll
Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/Microsoft.Spark.CSharp.Examples.dll
Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
```
</details>
3. Manually copy Worker binaries into the Samples output location.
```
cp ~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/* ~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish/
```
# Run Samples
Once you build the samples, you can use `spark-submit` to submit your .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
1. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
2. Running your app follows the basic structure:
1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
2. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/netcoreapp2.1/ubuntu.18.04-x64/publish`)
3. Running your app follows the basic structure:
```bash
spark-submit \
[--jars <any-jars-your-app-is-dependent-on>] \

Просмотреть файл

@ -42,7 +42,7 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
- Verify you are able to run `spark-shell` from your command-line
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```
Welcome to
____ __
@ -58,26 +58,22 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
scala> sc
res0: org.apache.spark.SparkContext = org.apache.spark.SparkContext@6eaa6b0c
```
Note: If you observe the following:
> ERROR Shell:397 - Failed to locate the winutils binary in the hadoop binary path
> java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
You can ignore this if you are planning on running Spark in [Standalone mode](https://spark.apache.org/docs/latest/spark-standalone.html). If not, you would have to setup **[WinUtils](https://github.com/steveloughran/winutils)**
- Download winutils.exe binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2.
- Save winutils.exe binary to a directory of your choice, e.g. c:\hadoop\bin.
- Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line:
```
set HADOOP_HOME=c:\hadoop
```
- Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line:
```
set PATH=%HADOOP_HOME%\bin;%PATH%
```
</details>
6. Install **[WinUtils](https://github.com/steveloughran/winutils)**
- Download `winutils.exe` binary from [WinUtils repository](https://github.com/steveloughran/winutils). You should select the version of Hadoop the Spark distribution was compiled with, e.g. use hadoop-2.7.1 for Spark 2.3.2.
- Save `winutils.exe` binary to a directory of your choice e.g., `c:\hadoop\bin`
- Set `HADOOP_HOME` to reflect the directory with winutils.exe (without bin). For instance, using command-line:
```powershell
set HADOOP_HOME=c:\hadoop
```
- Set PATH environment variable to include `%HADOOP_HOME%\bin`. For instance, using command-line:
```powershell
set PATH=%HADOOP_HOME%\bin;%PATH%
```
Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from your command-line before you move to the next section. Feel there is a better way? Please [open an issue](https://github.com/dotnet/spark/issues) and feel free to contribute.
> **Note**: A new instance of the command-line may be required if any environment variables were updated.
@ -86,7 +82,7 @@ Please make sure you are able to run `dotnet`, `java`, `mvn`, `spark-shell` from
For the rest of the section, it is assumed that you have cloned Spark .NET repo into your machine e.g., `c:\github\dotnet-spark\`
```
```powershell
git clone https://github.com/dotnet/spark.git c:\github\dotnet-spark
```
@ -96,7 +92,7 @@ When you submit a .NET application, Spark .NET has the necessary logic written i
Regardless of whether you are using .NET Framework or .NET Core, you will need to build the Spark .NET Scala extension layer. This is easy to do:
```
```powershell
cd src\scala
mvn clean package
```
@ -129,8 +125,8 @@ You should see JARs created for the supported Spark versions:
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```
Directory: C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\net461
```powershell
Directory: C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461
Mode LastWriteTime Length Name
@ -156,69 +152,55 @@ You should see JARs created for the supported Spark versions:
> Note: We are currently working on automating .NET Core builds for Spark .NET. Until then, we appreciate your patience in performing some of the steps manually.
1. Build the Worker
```
cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\
dotnet publish -f netcoreapp2.1 -r win10-x64
```
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```
PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj...
Restore completed in 37.29 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.props.
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\obj\Microsoft.Spark.Worker.csproj.nuget.g.targets.
Restore completed in 230.49 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj.
Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll
Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll
Microsoft.Spark.Worker -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\bin\Debug\netcoreapp2.1\win10-x64\publish\
```
</details>
2. Build the Samples
```powershell
cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\
dotnet publish -f netcoreapp2.1 -r win10-x64
```
cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\
Get-Content .\Microsoft.Spark.CSharp.Examples.csproj | Where-Object {$_ -notmatch 'Microsoft.Spark.Worker.csproj'} | Set-Content .\Microsoft.Spark.CSharp.Examples.Patched.csproj
dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj
```
Note the creation of a new patched `.csproj` file. This is due to a bug in .NET Core CLI that causes problems with building a dependency project that creates executables and we are working with the .NET team towards resolving this.
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```
PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64 .\Microsoft.Spark.CSharp.Examples.Patched.csproj
Microsoft (R) Build Engine version 15.9.20+g88f5fadfbe for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restoring packages for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj...
Restoring packages for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj...
Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.props.
Generating MSBuild file C:\github\dotnet-spark\src\csharp\Microsoft.Spark\obj\Microsoft.Spark.csproj.nuget.g.props.
Generating MSBuild file C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\obj\Microsoft.Spark.CSharp.Examples.Patched.csproj.nuget.g.targets.
Restore completed in 208.34 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.Patched.csproj.
Restore completed in 208.34 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
Microsoft.Spark -> C:\github\dotnet-spark\src\csharp\Microsoft.Spark\bin\Debug\netstandard2.0\Microsoft.Spark.dll
Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll
Microsoft.Spark.CSharp.Examples.Patched -> C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\bin\Debug\netcoreapp2.1\win10-x64\publish\
```
</details>
3. Manually copy Worker binaries into the Samples output location.
```
cp c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\* C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\
```
```powershell
PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f netcoreapp2.1 -r win10-x64
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restore completed in 299.95 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
Restore completed in 306.62 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj.
Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll
Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.Worker.dll
Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish\
```
</details>
2. Build the Samples
```powershell
cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\
dotnet publish -f netcoreapp2.1 -r win10-x64
```
<details>
<summary>&#x1F4D9; Click to see sample console output</summary>
```powershell
PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f netcoreapp2.1 -r win10-x64
Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
Restore completed in 44.22 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj.
Restore completed in 336.94 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.csproj.
Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll
Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\Microsoft.Spark.CSharp.Examples.dll
Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish\
```
</details>
# Run Samples
Once you build the samples, running them will be through `spark-submit` regardless of whether you are targeting .NET Framework or .NET Core apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark.
1. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
2. Running your app follows the basic structure:
1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
2. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\netcoreapp2.1\win10-x64\publish` for .NET Core)
3. Running your app follows the basic structure:
```powershell
spark-submit.cmd `
[--jars <any-jars-your-app-is-dependent-on>] `

Просмотреть файл

@ -9,7 +9,6 @@
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
</ItemGroup>

Просмотреть файл

@ -21,7 +21,6 @@
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
<ProjectReference Include="..\..\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj" />
</ItemGroup>

Просмотреть файл

@ -1,44 +1,44 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using Microsoft.Spark.E2ETest.Utils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests
{
[Collection("Spark E2E Tests")]
public class SparkContextTests
{
/// <summary>
/// Test signatures for APIs up to Spark 2.3.*.
/// </summary>
/// <remarks>
/// For the RDD related tests, refer to <see cref="RDDTests"/>.
/// </remarks>
[Fact]
public void TestSignaturesV2_3_X()
{
SparkContext sc = SparkContext.GetOrCreate(new SparkConf());
_ = sc.GetConf();
_ = sc.DefaultParallelism;
sc.SetJobDescription("job description");
sc.SetJobGroup("group id", "description");
sc.SetJobGroup("group id", "description", true);
sc.ClearJobGroup();
string filePath = $"{TestEnvironment.ResourceDirectory}people.txt";
sc.AddFile(filePath);
sc.AddFile(filePath, true);
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using Microsoft.Spark.E2ETest.Utils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests
{
[Collection("Spark E2E Tests")]
public class SparkContextTests
{
/// <summary>
/// Test signatures for APIs up to Spark 2.3.*.
/// </summary>
/// <remarks>
/// For the RDD related tests, refer to <see cref="RDDTests"/>.
/// </remarks>
[Fact]
public void TestSignaturesV2_3_X()
{
SparkContext sc = SparkContext.GetOrCreate(new SparkConf());
_ = sc.GetConf();
_ = sc.DefaultParallelism;
sc.SetJobDescription("job description");
sc.SetJobGroup("group id", "description");
sc.SetJobGroup("group id", "description", true);
sc.ClearJobGroup();
string filePath = $"{TestEnvironment.ResourceDirectory}people.txt";
sc.AddFile(filePath);
sc.AddFile(filePath, true);
using (var tempDir = new TemporaryDirectory())
{
sc.SetCheckpointDir(TestEnvironment.ResourceDirectory);
}
}
}
}
}
}
}
}

Просмотреть файл

@ -1,63 +1,63 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.IO;
namespace Microsoft.Spark.E2ETest.Utils
{
/// <summary>
/// Creates a temporary folder that is automatically cleaned up when disposed.
/// </summary>
internal sealed class TemporaryDirectory : IDisposable
{
private bool disposed = false;
/// <summary>
/// Path to temporary folder.
/// </summary>
public string Path { get; }
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.IO;
namespace Microsoft.Spark.E2ETest.Utils
{
/// <summary>
/// Creates a temporary folder that is automatically cleaned up when disposed.
/// </summary>
internal sealed class TemporaryDirectory : IDisposable
{
private bool disposed = false;
/// <summary>
/// Path to temporary folder.
/// </summary>
public string Path { get; }
public TemporaryDirectory()
{
Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString());
Cleanup();
Directory.CreateDirectory(Path);
Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}";
}
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
private void Cleanup()
{
if (File.Exists(Path))
{
File.Delete(Path);
}
else if (Directory.Exists(Path))
{
Directory.Delete(Path, true);
}
}
private void Dispose(bool disposing)
{
Cleanup();
Directory.CreateDirectory(Path);
Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}";
}
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
private void Cleanup()
{
if (File.Exists(Path))
{
File.Delete(Path);
}
else if (Directory.Exists(Path))
{
Directory.Delete(Path, true);
}
}
private void Dispose(bool disposing)
{
if (disposed)
{
{
return;
}
if (disposing)
{
Cleanup();
}
disposed = true;
}
}
}
}
if (disposing)
{
Cleanup();
}
disposed = true;
}
}
}

Просмотреть файл

@ -33,10 +33,6 @@ namespace Microsoft.Spark.Services
private string _workerPath;
// Note that the following is only for the backward compatibility and
// will be removed after the next release.
private const string WorkerPathSettingKey = "DotnetWorkerPath";
/// <summary>
/// Returns the port number for socket communication between JVM and CLR.
/// </summary>
@ -66,10 +62,7 @@ namespace Microsoft.Spark.Services
return _workerPath;
}
// Note that the "WorkerPathSettingKey" is only for the backward compatibility
// will be removed after the next release.
string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName) ??
Environment.GetEnvironmentVariable(WorkerPathSettingKey);
string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName);
// If the WorkerDirEnvName environment variable is set, the worker path is constructed
// based on it.
@ -80,17 +73,6 @@ namespace Microsoft.Spark.Services
return _workerPath;
}
// If the WorkerDirEnvName environment variable is not set, the worker path is
// constructed based on the current assembly's directory. This requires the worker
// executable is present.
workerDir = Path.GetDirectoryName(GetType().Assembly.Location);
_workerPath = Path.Combine(workerDir, s_procFileName);
if (File.Exists(_workerPath))
{
_logger.LogDebug($"Using the current assembly path to construct .NET worker path: {_workerPath}.");
return _workerPath;
}
// Otherwise, the worker exectuable name is returned meaning it should be PATH.
_workerPath = s_procFileName;
return _workerPath;