Merge pull request #26 from Microsoft/master

merge from main
This commit is contained in:
Renyi Xiong 2016-05-12 12:12:07 -07:00
Родитель e686eb6b2b 5b86ee546a
Коммит 82ea61c8d9
39 изменённых файлов: 418 добавлений и 189 удалений

Просмотреть файл

@ -127,7 +127,7 @@ Mobius is built and tested with Apache Spark [1.4.1](https://github.com/Microsof
Mobius releases are available at https://github.com/Microsoft/Mobius/releases. References needed to build C# Spark driver applicaiton using Mobius are also available in [NuGet](https://www.nuget.org/packages/Microsoft.SparkCLR)
[![Nuget](https://img.shields.io/badge/nuget-package-00BFFF.svg)](https://www.nuget.org/packages/Microsoft.SparkCLR)
[![NuGet Badge](https://buildstats.info/nuget/Microsoft.SparkCLR)](https://www.nuget.org/packages/Microsoft.SparkCLR)
Refer to [mobius-release-info.md](notes/mobius-release-info.md) for the details on versioning policy and the contents of the release.

Просмотреть файл

@ -46,7 +46,7 @@ after_test:
- pip install codecov
- codecov -f "SparkCLRCodeCoverage.xml"
- cmd: cd .\build\localmode
- cmd: .\Runsamples.cmd --validate
- cmd: if not defined ProjectVersion (.\Runsamples.cmd --validate)
- cmd: cd ..\..
- cmd: dir csharp\Microsoft*.nupkg
- cmd: dir scala\target\spark-clr*.jar

Просмотреть файл

@ -145,7 +145,7 @@ call Clean.cmd
call Build.cmd
if %ERRORLEVEL% NEQ 0 (
@echo Build Mobius C# examples failed, stop building.
@echo Build Mobius .NET examples failed, stop building.
popd
goto :eof
)
@ -195,10 +195,21 @@ if not defined ProjectVersion (
)
set SPARKCLR_NAME=spark-clr_2.10-%ProjectVersion%
@echo "%SPARKCLR_HOME%
@rem copy samples to top-level folder before zipping
@echo move /Y "%SPARKCLR_HOME%\samples "%CMDHOME%"
move /Y %SPARKCLR_HOME%\samples %CMDHOME%
@echo move /Y "%SPARKCLR_HOME%\data" "%CMDHOME%\samples"
move /Y %SPARKCLR_HOME%\data %CMDHOME%\samples
@rem copy release info
@echo copy /Y "%CMDHOME%\..\notes\mobius-release-info.md"
copy /Y "%CMDHOME%\..\notes\mobius-release-info.md"
@rem Create the zip file
@echo 7z a .\target\%SPARKCLR_NAME%.zip runtime localmode examples
7z a .\target\%SPARKCLR_NAME%.zip runtime localmode examples
@echo 7z a .\target\%SPARKCLR_NAME%.zip runtime examples samples mobius-release-info.md
7z a .\target\%SPARKCLR_NAME%.zip runtime examples samples mobius-release-info.md
:distdone
popd

Просмотреть файл

@ -86,7 +86,7 @@ pushd "$FWDIR/../examples"
if [ $? -ne 0 ];
then
echo "Build Mobius C# Examples failed, stop building."
echo "Build Mobius .NET Examples failed, stop building."
popd
exit 1
fi

Просмотреть файл

@ -45,12 +45,10 @@ set SPARK_VERSION=1.6.1
set HADOOP_VERSION=2.6
@echo [RunSamples.cmd] SPARK_VERSION=%SPARK_VERSION%, HADOOP_VERSION=%HADOOP_VERSION%
@rem Windows 7/8/10 may not allow powershell scripts by default
powershell -Command Set-ExecutionPolicy -Scope CurrentUser -ExecutionPolicy Unrestricted
@rem download runtime dependencies
pushd "%CMDHOME%"
powershell -f downloadtools.ps1 run !VERBOSE!
@rem Windows 7/8/10 may not allow powershell scripts by default
powershell -ExecutionPolicy Unrestricted -File downloadtools.ps1 run !VERBOSE!
@echo [RunSamples.cmd] UpdateRuntime.cmd
type ..\tools\updateruntime.cmd
call ..\tools\updateruntime.cmd

Просмотреть файл

@ -282,16 +282,34 @@ function Download-BuildTools
function Download-ExternalDependencies
{
# Downloading spark-csv package and its depenency. These packages are required for DataFrame operations in Mobius
$readMeStream = [System.IO.StreamWriter] "$scriptDir\..\dependencies\ReadMe.txt"
$readMeStream.WriteLine("The files in this folder are dependencies of Mobius Project")
$readMeStream.WriteLine("Refer to the following download locations for details on the jars like POM file, license etc.")
$readMeStream.WriteLine("")
$readMeStream.WriteLine("------------ Dependencies for CSV parsing in Mobius DataFrame API -----------------------------")
# Downloading spark-csv package and its depenency. These packages are required for DataFrame operations in Mobius
$url = "http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.3.0/spark-csv_2.10-1.3.0.jar"
$output="$scriptDir\..\dependencies\spark-csv_2.10-1.3.0.jar"
Download-File $url $output
Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
$readMeStream.WriteLine("$url")
$url = "http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.1/commons-csv-1.1.jar"
$output="$scriptDir\..\dependencies\commons-csv-1.1.jar"
Download-File $url $output
Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
$readMeStream.WriteLine("$url")
$readMeStream.WriteLine("")
$readMeStream.WriteLine("------------ Dependencies for Kafka-based processing in Mobius Streaming API -----------------------------")
$url = "http://search.maven.org/remotecontent?filepath=org/apache/spark/spark-streaming-kafka-assembly_2.10/1.6.1/spark-streaming-kafka-assembly_2.10-1.6.1.jar"
$output="$scriptDir\..\dependencies\spark-streaming-kafka-assembly_2.10-1.6.1.jar"
Download-File $url $output
Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
$readMeStream.WriteLine("$url")
$readMeStream.close()
return
}
@ -355,7 +373,7 @@ function Download-RuntimeDependencies
$winutilsExe = "$winutilsBin\winutils.exe"
if (!(test-path $winutilsExe))
{
$url = "http://public-repo-1.hortonworks.com/hdp-win-alpha/winutils.exe"
$url = "https://github.com/MobiusForSpark/winutils/blob/master/hadoop-2.6.0/bin/winutils.exe?raw=true"
$output=$winutilsExe
Download-File $url $output
}

Просмотреть файл

@ -1,17 +1,17 @@
<?xml version="1.0"?>
<?xml version="1.0"?>
<package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
<metadata>
<id>Microsoft.SparkCLR</id>
<version>1.6.1-SNAPSHOT</version>
<version>1.6.101-SNAPSHOT</version>
<authors>Microsoft Corporation</authors>
<owners>Microsoft Corporation</owners>
<licenseUrl>https://github.com/Microsoft/SparkCLR/blob/master/LICENSE</licenseUrl>
<projectUrl>https://github.com/Microsoft/SparkCLR</projectUrl>
<licenseUrl>https://github.com/Microsoft/Mobius/blob/master/LICENSE</licenseUrl>
<projectUrl>https://github.com/Microsoft/Mobius</projectUrl>
<requireLicenseAcceptance>true</requireLicenseAcceptance>
<description>C# language binding and extensions to Apache Spark</description>
<releaseNotes>See release notes on GitHub https://github.com/Microsoft/SparkCLR/releases</releaseNotes>
<releaseNotes>See release notes on GitHub https://github.com/Microsoft/Mobius/releases</releaseNotes>
<copyright>Copyright (c) Microsoft. All rights reserved. Licensed under the MIT license. See LICENSE file in the project root for full license information.</copyright>
<tags>C# .NET Apache Spark RDD DataFrame</tags>
<tags>C# .NET Apache Spark RDD DataFrame Streaming SparkCLR Mobius</tags>
<dependencies>
<dependency id="Razorvine.Pyrolite" version="4.10.0" />
<dependency id="Razorvine.Serpent" version="1.12.0" />

Просмотреть файл

@ -4,7 +4,7 @@ export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
export XBUILDOPT=/verbosity:minimal
if [ $builduri = "" ];
if [ -z $builduri ];
then
export builduri=build.sh
fi
@ -15,7 +15,7 @@ export PROJ="$FWDIR/$PROJ_NAME.sln"
echo "===== Building $PROJ ====="
function error_exit() {
if [ "$STEP" = "" ];
if [ -z $STEP ];
then
export STEP=$CONFIGURATION
fi

Просмотреть файл

@ -34,7 +34,7 @@
<ItemGroup>
<Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
</Reference>
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -42,7 +42,7 @@
</Reference>
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
</Reference>
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -67,7 +67,7 @@
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe.config">
<Link>CSharpWorker.exe.config</Link>
</None>
<None Include="App.config" />

Просмотреть файл

@ -4,5 +4,5 @@
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100-PREVIEW-1" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net45" />
</packages>

Просмотреть файл

@ -0,0 +1,45 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<!--*************************************************************************************-->
<!-- Log4Net is used by the example -->
<!--**************************************************************************************-->
<configSections>
<section name="log4net" type="log4net.Config.Log4NetConfigurationSectionHandler, log4net" />
</configSections>
<log4net>
<root>
<level value="DEBUG" />
<appender-ref ref="ConsoleAppender" />
<!--<appender-ref ref="LogFileAppender" />-->
</root>
<appender name="ConsoleAppender" type="log4net.Appender.ConsoleAppender">
<layout type="log4net.Layout.PatternLayout">
<conversionPattern value="[%date] [%thread] [%-5level] [%logger] - %message%newline" />
</layout>
</appender>
<appender name="LogFileAppender" type="log4net.Appender.RollingFileAppender">
<file type="log4net.Util.PatternString">
<conversionPattern value="%env{TEMP}\\SparkCLRLogs\\SparkCLR-WordCount_%env{COMPUTERNAME}[%processid].log" />
</file>
<param name="AppendToFile" value="true" />
<param name="MaxSizeRollBackups" value="2000" />
<param name="MaxFileSize" value="51200000" />
<param name="StaticLogFileName" value="false" />
<param name="DatePattern" value=".yyyy_MM_dd_hh" />
<param name="RollingStyle" value="Composite" />
<layout type="log4net.Layout.PatternLayout">
<conversionPattern value="[%date] [%thread] [%-5level] [%logger] - %message%newline" />
</layout>
</appender>
</log4net>
<appSettings>
<!--********************************************************************************************************-->
<!--** Uncomment the following settings to run Spark driver executable in **local** or **debug** modes ** -->
<!--** In debug mode, the driver is not launched by CSharpRunner but launched from VS or command prompt not configured for SparkCLR ** -->
<!--** CSharpBackend should be launched in debug mode as well and the port number from that should be used below ** -->
<!--** Command to launch CSharpBackend in debug mode is "sparkclr-submit.cmd debug" ** -->
<!--********************************************************************************************************-->
<!--<add key="CSharpWorkerPath" value="path.to.CSharpWorker.exe"/>
<add key="CSharpBackendPortNumber" value="11097"/>-->
</appSettings>
</configuration>

Просмотреть файл

@ -0,0 +1,41 @@
namespace WordCountFSharp.AssemblyInfo
open System.Reflection
open System.Runtime.CompilerServices
open System.Runtime.InteropServices
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[<assembly: AssemblyTitle("WordCountFSharp")>]
[<assembly: AssemblyDescription("")>]
[<assembly: AssemblyConfiguration("")>]
[<assembly: AssemblyCompany("")>]
[<assembly: AssemblyProduct("WordCountFSharp")>]
[<assembly: AssemblyCopyright("Copyright © 2016")>]
[<assembly: AssemblyTrademark("")>]
[<assembly: AssemblyCulture("")>]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[<assembly: ComVisible(false)>]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[<assembly: Guid("17b63d32-efc8-4ef5-831a-197a4fc29f06")>]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [<assembly: AssemblyVersion("1.0.*")>]
[<assembly: AssemblyVersion("1.0.0.0")>]
[<assembly: AssemblyFileVersion("1.0.0.0")>]
do
()

Просмотреть файл

@ -0,0 +1,35 @@
open Microsoft.Spark.CSharp.Core
open Microsoft.Spark.CSharp.Services
open System.Reflection
open System.Collections.Generic
[<EntryPoint>]
let main args =
match args with
| [| filePath |] ->
let logger =
LoggerServiceFactory.SetLoggerService Log4NetLoggerService.Instance
LoggerServiceFactory.GetLogger (MethodInfo.GetCurrentMethod().DeclaringType)
let sparkContext = SparkContext(SparkConf().SetAppName "MobiusWordCount")
logger.LogInfo (sprintf "Reading from file %s" filePath)
try
let lines = sparkContext.TextFile filePath
let counts =
lines.FlatMap(fun x -> x.Split ' ' :> _)
.Map(fun w -> KeyValuePair(w, 1))
.ReduceByKey(fun x y -> x + y)
.Collect()
for count in counts do
printfn "%s: %d" count.Key count.Value
with
| ex ->
logger.LogError "Error performing Word Count"
logger.LogException ex
sparkContext.Stop()
0
| _ ->
printfn "Usage: WordCount <file>"
1

Просмотреть файл

@ -0,0 +1,111 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>17b63d32-efc8-4ef5-831a-197a4fc29f06</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>WordCountFSharp</RootNamespace>
<AssemblyName>WordCountFSharp</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<TargetFSharpCoreVersion>4.4.0.0</TargetFSharpCoreVersion>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<Name>WordCountFSharp</Name>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<Tailcalls>false</Tailcalls>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<WarningLevel>3</WarningLevel>
<DocumentationFile>bin\Debug\WordCountFSharp.XML</DocumentationFile>
<StartArguments>
</StartArguments>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<Tailcalls>true</Tailcalls>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<WarningLevel>3</WarningLevel>
<DocumentationFile>bin\Release\WordCountFSharp.XML</DocumentationFile>
</PropertyGroup>
<PropertyGroup>
<MinimumVisualStudioVersion Condition="'$(MinimumVisualStudioVersion)' == ''">11</MinimumVisualStudioVersion>
</PropertyGroup>
<Choose>
<When Condition="'$(VisualStudioVersion)' != '11.0' and Exists('$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\FSharp\Microsoft.FSharp.Targets')">
<PropertyGroup>
<FSharpTargetsPath>$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\FSharp\Microsoft.FSharp.Targets</FSharpTargetsPath>
</PropertyGroup>
</When>
<When Condition="Exists('$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.1\Framework\v4.0\Microsoft.FSharp.Targets')">
<PropertyGroup>
<FSharpTargetsPath>$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.1\Framework\v4.0\Microsoft.FSharp.Targets</FSharpTargetsPath>
</PropertyGroup>
</When>
<When Condition="Exists('$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.0\Framework\v4.0\Microsoft.FSharp.Targets')">
<PropertyGroup>
<FSharpTargetsPath>$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.0\Framework\v4.0\Microsoft.FSharp.Targets</FSharpTargetsPath>
</PropertyGroup>
</When>
<When Condition="Exists('$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\4.0\Framework\v4.0\Microsoft.FSharp.Targets')">
<PropertyGroup>
<FSharpTargetsPath>$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\4.0\Framework\v4.0\Microsoft.FSharp.Targets</FSharpTargetsPath>
</PropertyGroup>
</When>
</Choose>
<Import Project="$(FSharpTargetsPath)" />
<ItemGroup>
<Compile Include="AssemblyInfo.fs" />
<Compile Include="WordCountExample.fs" />
<Content Include="App.config" />
<Content Include="packages.config" />
</ItemGroup>
<ItemGroup>
<Reference Include="CSharpWorker">
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="FSharp.Core">
<HintPath>..\..\packages\FSharp.Core.4.0.0.1\lib\net40\FSharp.Core.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="log4net">
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="Microsoft.Spark.CSharp.Adapter">
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="mscorlib" />
<Reference Include="Newtonsoft.Json">
<HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="Razorvine.Pyrolite">
<HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="Razorvine.Serpent">
<HintPath>..\..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Numerics" />
</ItemGroup>
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

Просмотреть файл

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="FSharp.Core" version="4.0.0.1" targetFramework="net45" />
<package id="log4net" version="2.0.5" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net45" />
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
</packages>

Просмотреть файл

@ -37,7 +37,7 @@
<ItemGroup>
<Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
</Reference>
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -45,7 +45,7 @@
</Reference>
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
</Reference>
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -66,7 +66,7 @@
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe.config">
<Link>CSharpWorker.exe.config</Link>
</None>
<None Include="App.config">

Просмотреть файл

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="log4net" version="2.0.5" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100-PREVIEW-1" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net45" />
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />

Просмотреть файл

@ -1,4 +1,4 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2013
VisualStudioVersion = 12.0.30501.0
@ -17,10 +17,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "EventHub", "Streaming\Event
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Batch", "Batch", "{AE001E84-471E-4D02-BDDE-40B85915CEAE}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Pi", "Batch\Pi\Pi.csproj", "{913E6A56-9839-4379-8B3C-855BA9341663}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Pi", "Batch\pi\Pi.csproj", "{913E6A56-9839-4379-8B3C-855BA9341663}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WordCount", "Batch\WordCount\WordCount.csproj", "{17E4C27F-8441-425A-B82B-23BA5E313CC4}"
EndProject
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "WordCountFSharp", "Batch\WordCountFSharp\WordCountFSharp.fsproj", "{17B63D32-EFC8-4EF5-831A-197A4FC29F06}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Kafka", "Streaming\Kafka\Kafka.csproj", "{8764EAAA-9D32-4549-A64F-C7C89B014EA6}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HiveDataFrame", "Sql\HiveDataFrame\HiveDataFrame.csproj", "{5C97498A-C4DB-43DD-86AD-4E50DEE8D405}"
@ -55,6 +57,10 @@ Global
{17E4C27F-8441-425A-B82B-23BA5E313CC4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{17E4C27F-8441-425A-B82B-23BA5E313CC4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{17E4C27F-8441-425A-B82B-23BA5E313CC4}.Release|Any CPU.Build.0 = Release|Any CPU
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Debug|Any CPU.Build.0 = Debug|Any CPU
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Release|Any CPU.ActiveCfg = Release|Any CPU
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Release|Any CPU.Build.0 = Release|Any CPU
{8764EAAA-9D32-4549-A64F-C7C89B014EA6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8764EAAA-9D32-4549-A64F-C7C89B014EA6}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8764EAAA-9D32-4549-A64F-C7C89B014EA6}.Release|Any CPU.ActiveCfg = Release|Any CPU
@ -74,6 +80,7 @@ Global
{DF840BFB-B3A3-493D-B667-4CF21ADBFCAD} = {6F90310A-2DA2-4E81-A062-8D8A9F47C25B}
{913E6A56-9839-4379-8B3C-855BA9341663} = {AE001E84-471E-4D02-BDDE-40B85915CEAE}
{17E4C27F-8441-425A-B82B-23BA5E313CC4} = {AE001E84-471E-4D02-BDDE-40B85915CEAE}
{17B63D32-EFC8-4EF5-831A-197A4FC29F06} = {AE001E84-471E-4D02-BDDE-40B85915CEAE}
{8764EAAA-9D32-4549-A64F-C7C89B014EA6} = {6F90310A-2DA2-4E81-A062-8D8A9F47C25B}
{5C97498A-C4DB-43DD-86AD-4E50DEE8D405} = {28600A86-E011-41C9-AB41-591580EDB9F1}
EndGlobalSection

Просмотреть файл

@ -38,11 +38,11 @@
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
</Reference>
<Reference Include="CSharpWorker, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System" />

Просмотреть файл

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="log4net" version="2.0.5" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100-PREVIEW-1" targetFramework="net452" />
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net452" />
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />

Просмотреть файл

@ -36,7 +36,7 @@
<ItemGroup>
<Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
</Reference>
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -44,7 +44,7 @@
</Reference>
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
</Reference>
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -65,7 +65,7 @@
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe.config">
<Link>CSharpWorker.exe.config</Link>
</None>
<None Include="App.config" />

Просмотреть файл

@ -4,5 +4,5 @@
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100-PREVIEW-1" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net45" />
</packages>

Просмотреть файл

@ -36,7 +36,7 @@
<ItemGroup>
<Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
</Reference>
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -44,7 +44,7 @@
</Reference>
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
</Reference>
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -65,7 +65,7 @@
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe.config">
<Link>CSharpWorker.exe.config</Link>
</None>
<None Include="App.config" />

Просмотреть файл

@ -4,5 +4,5 @@
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100-PREVIEW-1" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net45" />
</packages>

Просмотреть файл

@ -35,7 +35,7 @@
</PropertyGroup>
<ItemGroup>
<Reference Include="CSharpWorker">
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
</Reference>
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -43,7 +43,7 @@
</Reference>
<Reference Include="Microsoft.CSharp" />
<Reference Include="Microsoft.Spark.CSharp.Adapter">
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
</Reference>
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
@ -68,7 +68,7 @@
<Compile Include="EventPublisher.cs" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe.config">
<Link>CSharpWorker.exe.config</Link>
</None>
<None Include="App.config">

Просмотреть файл

@ -4,7 +4,7 @@
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100-PREVIEW-1" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net45" />
<!-- *** ****************************************************************** *** -->
<!-- *** Following references are needed for publishing events to EventHubs *** -->

Просмотреть файл

@ -38,7 +38,7 @@
</Reference>
<Reference Include="CSharpWorker">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
</Reference>
<Reference Include="log4net">
<SpecificVersion>False</SpecificVersion>
@ -46,7 +46,7 @@
</Reference>
<Reference Include="Microsoft.Spark.CSharp.Adapter">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
</Reference>
<Reference Include="Razorvine.Pyrolite">
<SpecificVersion>False</SpecificVersion>
@ -64,7 +64,7 @@
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
<None Include="..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe.config">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="App.config" />

Просмотреть файл

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="log4net" version="2.0.5" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100-PREVIEW-1" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net45" />
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />

Просмотреть файл

@ -33,14 +33,14 @@
</PropertyGroup>
<ItemGroup>
<Reference Include="CSharpWorker">
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
</Reference>
<Reference Include="log4net, Version=1.2.10.0, Culture=neutral, PublicKeyToken=1b44e1d426115821, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
</Reference>
<Reference Include="Microsoft.Spark.CSharp.Adapter">
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
</Reference>
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>

Просмотреть файл

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="log4net" version="2.0.5" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100-PREVIEW-1" targetFramework="net45" />
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net45" />
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />

Просмотреть файл

@ -4,7 +4,7 @@ export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
export XBUILDOPT=/verbosity:minimal
if [ $builduri = "" ];
if [ -z $builduri ];
then
export builduri=build.sh
fi
@ -15,7 +15,7 @@ export PROJ="$FWDIR/$PROJ_NAME.sln"
echo "===== Building $PROJ ====="
function error_exit() {
if [ "$STEP" = "" ];
if [ -z $STEP ];
then
export STEP=$CONFIGURATION
fi

Просмотреть файл

@ -3,8 +3,9 @@
## Prerequisites
* JDK 7 or above.
* Maven 3.3.3 or above.
* Maven 3.0.5 or above.
* Mono 4.2 stable or above. The download and installation instructions for Mono are available in [http://www.mono-project.com/download/#download-lin](http://www.mono-project.com/download/#download-lin).
* F# for Mono. The download and installation instructions for the F# Mono extension are available in [http://fsharp.org/use/linux/](http://fsharp.org/use/linux/)
* NuGet.
* XSLTPROC
@ -13,86 +14,9 @@ The following environment variables should be set properly:
* `JAVA_HOME`
## Instructions
* With `JAVA_HOME` set properly, navigate to [Mobius/build](../build) directory:
```
./build.sh
```
* Optional:
- Under [Mobius/scala](../scala) directory, run the following command to clean spark-clr*.jar built above:
```
mvn clean
```
- Under [Mobius/csharp](../csharp) directory, run the following command to clean the .NET binaries built above:
```
./clean.sh
```
[build.sh](../build/build.sh) prepares the following directories under `Mobius\build\runtime` after the build is done:
* **lib** ( `spark-clr*.jar` )
* **bin** ( `Microsoft.Spark.CSharp.Adapter.dll`, `CSharpWorker.exe`)
* **samples** ( The contents of `Mobius/csharp/Samples/Microsoft.Spark.CSharp/bin/Release/*`, including `Microsoft.Spark.CSharp.Adapter.dll`, `CSharpWorker.exe`, `SparkCLRSamples.exe`, `SparkCLRSamples.exe.Config` etc. )
* **scripts** ( `sparkclr-submit.sh` )
* **data** ( `Mobius/csharp/Samples/Microsoft.Spark.CSharp/data/*` )
# Running Samples
## Prerequisites
JDK is installed, and the following environment variables should be set properly:
* `JAVA_HOME`
## Running in Local mode
With `JAVA_HOME` set properly, navigate to [Mobius\build\localmode](../build/localmode) directory:
```
./run-samples.sh
```
It is **required** to run [build.sh](../build/build.sh) prior to running [run-samples.sh](../build/localmode/run-samples.sh).
[run-samples.sh](../build/localmode/run-samples.sh) downloads the version of Apache Spark referenced in the current branch, sets up `SPARK_HOME` environment variable, points `SPARKCLR_HOME` to `Mobius/build/runtime` directory created by [build.sh](../build/build.sh), and invokes [sparkclr-submit.sh](../scripts/sparkclr-submit.sh), with `spark.local.dir` set to `Mobius/build/runtime/Temp`.
A few more [run-samples.sh](../build/localmode/run-samples.sh) examples:
- To display all options supported by [run-samples.sh](../build/localmode/run-samples.sh):
```
run-samples.sh --help
```
- To run PiSample only:
```
run-samples.sh --torun pi*
```
- To run PiSample in verbose mode, with all logs displayed at console:
```
run-samples.sh --torun pi* --verbose
```
## Running in Standalone mode
```
sparkclr-submit.sh --verbose --master spark://host:port --exe SparkCLRSamples.exe $SPARKCLR_HOME/samples sparkclr.sampledata.loc hdfs://path/to/sparkclr/sampledata
```
- When option `--deploy-mode` is specified with `cluster`, option `--remote-sparkclr-jar` is required and needs to be specified with a valid file path of spark-clr*.jar on HDFS.
## Running in YARN mode
```
sparkclr-submit.sh --verbose --master yarn-cluster --exe SparkCLRSamples.exe $SPARKCLR_HOME/samples sparkclr.sampledata.loc hdfs://path/to/sparkclr/sampledata
```
Same as [instructions for Windows](windows-instructions.md#instructions) but use the following script files instead of .cmd files:
* build.sh
* clean.sh
# Running Unit Tests
@ -101,3 +25,9 @@ sparkclr-submit.sh --verbose --master yarn-cluster --exe SparkCLRSamples.exe $SP
./test.sh
```
# Running Samples
Same as [instructions for Windows](windows-instructions.md#running-samples) but using the following scripts instead of .cmd files:
* run-samples.sh
* sparkclr-submit.sh
Note that paths to files and syntax of the environment variables (like $SPARKCLR_HOME) will need to be updated for Linux when following the instructions for Windows.

Просмотреть файл

@ -2,27 +2,31 @@
The [release in GitHub](https://github.com/Microsoft/Mobius/releases) is a zip file. When you unzip that file, you will see a directory layout as follows:
````
|-- examples
|-- Example Mobius applications
|-- localmode
|-- Scripts for running samples and examples in local mode
|-- mobius-release-info.md
|-- runtime
|-- bin
|-- .NET binaries and its dependencies used by Mobius applications
|-- data
|-- Data files used by the [samples](..\csharp\Samples\Microsoft.Spark.CSharp)
|-- examples
|-- C# Spark driver [examples](..\examples) implemented using Mobius
|-- dependencies
|-- jar files Mobius depends on for functionality like CSV parsing, Kafka message processing etc.
|-- lib
|-- Mobius jar file
|-- samples
|-- C# Spark driver [samples](..\csharp\Samples\Microsoft.Spark.CSharp) for Moibus API
|-- scripts
|-- Mobius job submission scripts
|-- examples
|-- Example Mobius applications
|-- samples
|-- C# Spark driver samples for Mobius API
|-- data
|-- Data files used by the samples
````
You can run all the samples locally by invoking `localmode\RunSamples.cmd`. The script automatically downloads Apache Spark distribution and run the samples on your local machine. Note: Apache Spark distribution is a greater than 200 Mbytes download; `Runsamples.cmd` only downloads the Apache Spark distribution once.
[Mobius examples](..\examples) may have external dependencies and may need configuration settings to those dependencies before they can be run.
Instructions on running a Mobius app is available at https://github.com/Microsoft/Mobius/blob/master/notes/running-mobius-app.md
Mobius samples do not have any extenral dependencies. The dependent jar files and data files used by samples are included in the release. Instructions to run samples are available at
* https://github.com/Microsoft/Mobius/blob/master/notes/windows-instructions.md#running-samples for Windows
* https://github.com/Microsoft/Mobius/blob/master/notes/linx-instructions.md#running-samples for Linux
Mobius examples under "examples" folder may have external dependencies and may need configuration settings to those dependencies before they can be run. Refer to [Running Examples](https://github.com/Microsoft/Mobius/blob/master/notes/running-mobius-app.md#running-mobius-examples-in-local-mode) for details on how to run each example.
# NuGet Package
The packages published to [NuGet](https://www.nuget.org/packages/Microsoft.SparkCLR/) are primarily for references when building Mobius application. If Visual Studio is used for development. the reference to the NuGet package will go in packages.config file.

Просмотреть файл

@ -8,6 +8,18 @@ The following software need to be installed and appropriate environment variable
|winutils.exe | see [Running Hadoop on Windows](https://wiki.apache.org/hadoop/WindowsProblems) for details |HADOOP_HOME |Spark in Windows needs this utility in `%HADOOP_HOME%\bin` directory. It can be copied over from any Hadoop distribution. Alternative, if you used [`RunSamples.cmd`](../csharp/Samples/Microsoft.Spark.CSharp/samplesusage.md) to run Mobius samples, you can find `toos\winutils` directory (under [`build`](../build) directory) that can be used as HADOOP_HOME |
|Mobius |[v1.5.200](https://github.com/Microsoft/Mobius/releases) or v1.6.100-PREVIEW-1 | SPARKCLR_HOME |If you downloaded a [Mobius release](https://github.com/Microsoft/Mobius/releases), SPARKCLR_HOME should be set to the directory named `runtime` (for example, `D:\downloads\spark-clr_2.10-1.5.200\runtime`). Alternatively, if you used [`RunSamples.cmd`](../csharp/Samples/Microsoft.Spark.CSharp/samplesusage.md) to run Mobius samples, you can find `runtime` directory (under [`build`](../build) directory) that can be used as SPARKCLR_HOME. **Note** - setting SPARKCLR_HOME is _optional_ and it is set by sparkclr-submit.cmd if not set. |
## Dependencies
Some features in Mobius depend on classes outside of Spark and Mobius. A selected set of jar files that Mobius depends on are available in Mobius release under "runtime\dependencies" folder. These jar files are used with "--jars" parameter in Mobius (that is sparkclr-submit.cmd) and they get passed to Spark (spark-submit.cmd).
The following tables lists the Mobius features and their dependencies. The version numbers in the jar files below are just for completeness in names and a different version of the jar file may work with Mobius.
|Mobius Feature | Dependencies |
|----|-----|
|Using CSV files with DataFrame API | <ui><li>spark-csv_2.10-1.3.0.jar</li><li>commons-csv-1.1.jar</li></ui> |
|Kafka messages processing with DStream API | spark-streaming-kafka-assembly_2.10-1.6.1.jar |
Note that additional external jar files may need to be specificed as dependencies for a Mobius application depending on the Mobius features used (like EventHubs event processing or using Hive). These jars are not included in Mobius release under "dependencies" folder.
## Windows Instructions
### Local Mode
To use Mobius with Spark available locally in a machine, navigate to `%SPARKCLR_HOME%\scripts` directory and run the following command
@ -116,7 +128,7 @@ The instructions above cover running Mobius applications in Windows. With the fo
| Type | Examples |
| ------------- |--------------|
| Batch | <ul><li>[Pi](#pi-example-batch)</li><li>[Word Count](#wordcount-example-batch)</li></ul> |
| SQL | <ul><li>[JDBC](#jdbc-example-sql)</li><li>[Spark-XML](#spark-xml-example-sql)</li></ul> |
| SQL | <ul><li>[JDBC](#jdbc-example-sql)</li><li>[Spark-XML](#spark-xml-example-sql)</li><li>[Hive](#hive-example-sql)</li></ul> |
| Streaming | <ul><li>[Kafka](#kafka-example-streaming)</li><li>[EventHubs](#eventhubs-example-streaming)</li><li>[HDFS Word Count](#hdfswordcount-example-streaming)</li></ul> |
The following sample commands show how to run Mobius examples in local mode. Using the instruction above, the following sample commands can be tweaked to run in other modes
@ -142,6 +154,11 @@ The schema and row count of the table name provided as the commandline argument
Displays the number of XML elements in the input XML file provided as the first argument to SparkClrXml.exe and writes the modified XML to the file specified in the second commandline argument.
### Hive Example (Sql)
*
`sparkclr-submit.cmd --jars <jar files used for using Hive in Spark> --exe HiveDataFrame.exe C:\Git\Mobius\examples\Sql\HiveDataFrame\bin\Debug`
Reads data from a csv file, creates a Hive table and reads data from it
### EventHubs Example (Streaming)
* Get the following jar files
* qpid-amqp-1-0-client-0.32.jar

Просмотреть файл

@ -6,7 +6,7 @@
* Developer Command Prompt for [Visual Studio](https://www.visualstudio.com/) 2013 or above, which comes with .NET Framework 4.5 or above. Note: [Visual Studio 2015 Community Edition](https://www.visualstudio.com/en-us/products/visual-studio-community-vs.aspx) is **FREE**.
* 64-bit JDK 7u85 or above; or, 64-bit JDK 8u60 or above. OpenJDK for Windows can be downloaded from [http://www.azul.com/downloads/zulu/zulu-windows/](http://www.azul.com/downloads/zulu/zulu-windows/); Oracle JDK8 for Windows is available at Oracle website.
JDK should be downloaded manually, and the following environment variables should be set properly in the Developer Command Prompt for Visual Studio:
The following environment variables should be set properly in the Developer Command Prompt for Visual Studio:
* `JAVA_HOME`
@ -40,16 +40,43 @@ JDK should be downloaded manually, and the following environment variables shoul
* **scripts** ( `sparkclr-submit.cmd` )
* **data** ( `Mobius\csharp\Samples\Microsoft.Spark.CSharp\data\*` )
# Running Unit Tests
* In Visual Studio: Install NUnit3 Test Adapter. Run the tests through "Test" -> "Run" -> "All Tests"
* Install NUnit Runner 3.0 or above using NuGet (see [https://www.nuget.org/packages/NUnit.Runners/](https://www.nuget.org/packages/NUnit.Runners/)). In Developer Command Prompt for VS, set `NUNITCONSOLE` to the path to nunit console, and navigate to `Mobius\csharp` and run the following command:
```
Test.cmd
```
# Running Samples
Samples demonstrate comprehesive usage of Mobius API and also serve as functional tests for the API. Following are the options to run samples:
* [Local mode](#running-in-local-mode)
* [Standalone cluster](#running-in-standalone-mode)
* [YARN cluster](#running-in-yarn-mode)
* [Local mode dev environment](#running-in-local-mode-dev-environment) (using artifacts built in the local Git repo)
## Prerequisites
JDK should be downloaded manually, and the following environment variables should be set properly in the Developer Command Prompt for Visual Studio:
* `JAVA_HOME`
The prerequisites for running Mobius samples are same as the ones for running any other Mobius applications. Refer to [instructions](.\running-mobius-app.md#pre-requisites) for details on that. [Local mode dev environment](#running-in-local-mode-dev-environment) makes it easier to run samples in dev environment by downloading Spark.
## Running in Local mode
```
sparkclr-submit.cmd --verbose --jars c:\MobiusRelease\dependencies\spark-csv_2.10-1.3.0.jar,c:\MobiusRelease\dependencies\commons-csV-1.1.jar --exe SparkCLRSamples.exe c:\MobiusRelease\samples sparkclr.sampledata.loc c:\MobiusRelease\samples\data
```
## Running in Standalone mode
```
sparkclr-submit.cmd --verbose --master spark://host:port --jars <hdfs path to spark-csv_2.10-1.3.0.jar,commons-csv-1.1.jar> --exe SparkCLRSamples.exe %SPARKCLR_HOME%\samples sparkclr.sampledata.loc hdfs://path/to/mobius/sampledata
```
- When option `--deploy-mode` is specified with `cluster`, option `--remote-sparkclr-jar` is required and needs to be specified with a valid file path of spark-clr*.jar on HDFS.
## Running in YARN mode
```
sparkclr-submit.cmd --verbose --master yarn-cluster --jars <hdfs path to spark-csv_2.10-1.3.0.jar,commons-csv-1.1.jar> --exe SparkCLRSamples.exe %SPARKCLR_HOME%\samples sparkclr.sampledata.loc hdfs://path/to/mobius/sampledata
```
## Running in local mode dev environment
In the Developer Command Prompt for Visual Studio where `JAVA_HOME` is set properly, navigate to [Mobius\build](../build/) directory:
```
@ -78,25 +105,3 @@ A few more [RunSamples.cmd](../build/localmode/RunSamples.cmd) examples:
```
RunSamples.cmd --torun pi* --verbose
```
## Running in Standalone mode
```
sparkclr-submit.cmd --verbose --master spark://host:port --exe SparkCLRSamples.exe %SPARKCLR_HOME%\samples sparkclr.sampledata.loc hdfs://path/to/mobius/sampledata
```
- When option `--deploy-mode` is specified with `cluster`, option `--remote-sparkclr-jar` is required and needs to be specified with a valid file path of spark-clr*.jar on HDFS.
## Running in YARN mode
```
sparkclr-submit.cmd --verbose --master yarn-cluster --exe SparkCLRSamples.exe %SPARKCLR_HOME%\samples sparkclr.sampledata.loc hdfs://path/to/mobius/sampledata
```
# Running Unit Tests
* In Visual Studio: Install NUnit3 Test Adapter. Run the tests through "Test" -> "Run" -> "All Tests"
* Install NUnit Runner 3.0 or above using NuGet (see [https://www.nuget.org/packages/NUnit.Runners/](https://www.nuget.org/packages/NUnit.Runners/)). In Developer Command Prompt for VS, set `NUNITCONSOLE` to the path to nunit console, and navigate to `Mobius\csharp` and run the following command:
```
Test.cmd
```

Просмотреть файл

@ -2,22 +2,22 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.microsoft.sparkclr</groupId>
<artifactId>spark-clr_2.10</artifactId>
<version>1.6.1-SNAPSHOT</version>
<name>SparkCLR Project</name>
<version>1.6.101-SNAPSHOT</version>
<name>Mobius Project</name>
<description>C# language binding and extensions to Apache Spark</description>
<url>https://github.com/Microsoft/sparkclr</url>
<url>https://github.com/Microsoft/Mobius</url>
<inceptionYear>2015</inceptionYear>
<licenses>
<license>
<name>MIT License</name>
<url>https://github.com/Microsoft/SparkCLR/blob/master/LICENSE</url>
<url>https://github.com/Microsoft/Mobius/blob/master/LICENSE</url>
<distribution>repo</distribution>
</license>
</licenses>
<developers>
<developer>
<name>SparkCLR Team</name>
<name>Mobius Team</name>
<email>sparkclrdev@microsoft.com</email>
<organization>Microsoft</organization>
<organizationUrl>http://www.microsoft.com</organizationUrl>
@ -25,9 +25,9 @@
</developers>
<scm>
<connection>scm:git:git@github.com:Microsoft/SparkCLR.git</connection>
<developerConnection>scm:git:git@github.com:Microsoft/SparkCLR.git</developerConnection>
<url>git@github.com:Microsoft/SparkCLR.git</url>
<connection>scm:git:git@github.com:Microsoft/Mobius.git</connection>
<developerConnection>scm:git:git@github.com:Microsoft/Mobius.git</developerConnection>
<url>git@github.com:Microsoft/Mobius.git</url>
</scm>
<properties>

Просмотреть файл

@ -291,9 +291,7 @@ class DynamicPartitionKafkaInputDStream[
}
override def start(): Unit = {
if(refreshOffsetsScheduler == null) {
instantiateAndStartRefreshOffsetsScheduler
}
instantiateAndStartRefreshOffsetsScheduler
}
override def stop(): Unit = {

Просмотреть файл

@ -36,7 +36,7 @@ if "%SPARK_ASSEMBLY_JAR%"=="0" (
exit /b 1
)
if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.10-1.6.1-SNAPSHOT.jar)
if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.10-1.6.101-SNAPSHOT.jar)
echo SPARKCLR_JAR=%SPARKCLR_JAR%
set SPARKCLR_CLASSPATH=%SPARKCLR_HOME%\lib\%SPARKCLR_JAR%
REM SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode

Просмотреть файл

@ -54,7 +54,7 @@ then
exit 1
fi
export SPARKCLR_JAR=spark-clr_2.10-1.6.1-SNAPSHOT.jar
export SPARKCLR_JAR=spark-clr_2.10-1.6.101-SNAPSHOT.jar
export SPARKCLR_CLASSPATH="$SPARKCLR_HOME/lib/$SPARKCLR_JAR"
# SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
[ ! "$SPARKCLR_DEBUGMODE_EXT_JARS" = "" ] && export SPARKCLR_CLASSPATH="$SPARKCLR_CLASSPATH:$SPARKCLR_DEBUGMODE_EXT_JARS"