Dev/sutyag/upgrade mobius (#697)
* basic * Add extractor and outputter * Add reducer not done * Add procedure * kill node, broadcast, upload executable error feed to cosmos, specify avro or parquet syntax * Add more functions to HDFS. Add submitter heartbeat Update doc * Redesign cosmos download, add replication setting for hdfs * Improve executable runner to deal with bad lines * MERGE MOBIUS * change dependency path * Add registration method to mobius * Major refactoring to add ISparkosmosModule to modulize everything Start supporting streaming Fixed a couple of Mobius bugs Added integration tests Reenabled unit tests Added DatedPath * Make sparkcontext settable, fix setjobgroup * Expose more interface from Mobius * Mobius change for Spark 2.3 * fix version conflict, remove unused files * Added support for multiple UDFs * Fixed non sql udf issue * 1. Upgarde mobius to spark 2.3.1 2. Fixed UDF bugs 3. Added support for multipe UDFs * 1. Added sample testcases 2.Updated referece for examples * Removed stashed files * Fixed review comments * Fixed review comments * Fixed failed unit test cases * Deleting all the things * Updated version in appveyor * Updated tartool download path * Fixed java process terminate issue * Revert access modifier to internal from public for JvmBridge
This commit is contained in:
Родитель
09462fff7e
Коммит
9aa97b98c6
|
@ -1,4 +1,4 @@
|
||||||
version: 2.0.2-SNAPSHOT.{build}
|
version: 2.3.1-SNAPSHOT.{build}
|
||||||
|
|
||||||
environment:
|
environment:
|
||||||
securefile:
|
securefile:
|
||||||
|
|
|
@ -6,6 +6,8 @@ rem Copyright (c) Microsoft. All rights reserved.
|
||||||
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||||
rem
|
rem
|
||||||
|
|
||||||
|
SET MAVEN_OPTS=-Dhttps.protocols=TLSv1,TLSv1.1,TLSv1.2
|
||||||
|
|
||||||
if "%1" == "csharp" set buildCSharp=true
|
if "%1" == "csharp" set buildCSharp=true
|
||||||
|
|
||||||
SET CMDHOME=%~dp0
|
SET CMDHOME=%~dp0
|
||||||
|
|
|
@ -47,7 +47,7 @@ if "%precheck%" == "bad" (goto :EOF)
|
||||||
@rem
|
@rem
|
||||||
@rem setup Hadoop and Spark versions
|
@rem setup Hadoop and Spark versions
|
||||||
@rem
|
@rem
|
||||||
set SPARK_VERSION=2.0.2
|
set SPARK_VERSION=2.3.1
|
||||||
set HADOOP_VERSION=2.6
|
set HADOOP_VERSION=2.6
|
||||||
set APACHE_DIST_SERVER=archive.apache.org
|
set APACHE_DIST_SERVER=archive.apache.org
|
||||||
@echo [RunSamples.cmd] SPARK_VERSION=%SPARK_VERSION%, HADOOP_VERSION=%HADOOP_VERSION%, APACHE_DIST_SERVER=%APACHE_DIST_SERVER%
|
@echo [RunSamples.cmd] SPARK_VERSION=%SPARK_VERSION%, HADOOP_VERSION=%HADOOP_VERSION%, APACHE_DIST_SERVER=%APACHE_DIST_SERVER%
|
||||||
|
@ -100,7 +100,7 @@ if "!USER_EXE!"=="" (
|
||||||
call sparkclr-submit.cmd --conf spark.sql.warehouse.dir=%TEMP_DIR% %*
|
call sparkclr-submit.cmd --conf spark.sql.warehouse.dir=%TEMP_DIR% %*
|
||||||
)
|
)
|
||||||
|
|
||||||
@if ERRORLEVEL 1 GOTO :ErrorStop
|
@if ERRORLEVEL 2 GOTO :ErrorStop
|
||||||
|
|
||||||
@GOTO :EOF
|
@GOTO :EOF
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ if ($stage.ToLower() -eq "run")
|
||||||
$hadoopVersion = if ($envValue -eq $null) { "2.6" } else { $envValue }
|
$hadoopVersion = if ($envValue -eq $null) { "2.6" } else { $envValue }
|
||||||
|
|
||||||
$envValue = [Environment]::GetEnvironmentVariable("SPARK_VERSION")
|
$envValue = [Environment]::GetEnvironmentVariable("SPARK_VERSION")
|
||||||
$sparkVersion = if ($envValue -eq $null) { "2.0.2" } else { $envValue }
|
$sparkVersion = if ($envValue -eq $null) { "2.3.1" } else { $envValue }
|
||||||
|
|
||||||
Write-Output "[downloadtools] hadoopVersion=$hadoopVersion, sparkVersion=$sparkVersion, apacheDistServer=$apacheDistServer"
|
Write-Output "[downloadtools] hadoopVersion=$hadoopVersion, sparkVersion=$sparkVersion, apacheDistServer=$apacheDistServer"
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,7 +16,7 @@ do
|
||||||
done
|
done
|
||||||
|
|
||||||
# setup Hadoop and Spark versions
|
# setup Hadoop and Spark versions
|
||||||
export SPARK_VERSION=2.0.2
|
export SPARK_VERSION=2.3.1
|
||||||
export HADOOP_VERSION=2.6
|
export HADOOP_VERSION=2.6
|
||||||
export APACHE_DIST_SERVER=archive.apache.org
|
export APACHE_DIST_SERVER=archive.apache.org
|
||||||
echo "[run-samples.sh] SPARK_VERSION=$SPARK_VERSION, HADOOP_VERSION=$HADOOP_VERSION, APACHE_DIST_SERVER=$APACHE_DIST_SERVER"
|
echo "[run-samples.sh] SPARK_VERSION=$SPARK_VERSION, HADOOP_VERSION=$HADOOP_VERSION, APACHE_DIST_SERVER=$APACHE_DIST_SERVER"
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
<Configuration>Debug</Configuration>
|
<Configuration>Debug</Configuration>
|
||||||
|
@ -20,13 +20,13 @@
|
||||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
<PlatformToolset>v120</PlatformToolset>
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
<CharacterSet>Unicode</CharacterSet>
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||||
<UseDebugLibraries>false</UseDebugLibraries>
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
<PlatformToolset>v120</PlatformToolset>
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
<CharacterSet>Unicode</CharacterSet>
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
|
@ -35,16 +35,17 @@
|
||||||
<ErrorReport>prompt</ErrorReport>
|
<ErrorReport>prompt</ErrorReport>
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
<DocumentationFile>..\documentation\Microsoft.Spark.CSharp.Adapter.Doc.XML</DocumentationFile>
|
<DocumentationFile>..\documentation\Microsoft.Spark.CSharp.Adapter.Doc.XML</DocumentationFile>
|
||||||
|
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<StartupObject />
|
<StartupObject />
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="log4net">
|
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json">
|
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
<HintPath>..\..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Razorvine.Pyrolite">
|
<Reference Include="Razorvine.Pyrolite">
|
||||||
<HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
<HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||||
|
@ -98,6 +99,7 @@
|
||||||
<Compile Include="Network\RioNative.cs" />
|
<Compile Include="Network\RioNative.cs" />
|
||||||
<Compile Include="Network\RioSocketWrapper.cs" />
|
<Compile Include="Network\RioSocketWrapper.cs" />
|
||||||
<Compile Include="Network\SaeaSocketWrapper.cs" />
|
<Compile Include="Network\SaeaSocketWrapper.cs" />
|
||||||
|
<Compile Include="Network\SocketInfo.cs" />
|
||||||
<Compile Include="Network\SocketStream.cs" />
|
<Compile Include="Network\SocketStream.cs" />
|
||||||
<Compile Include="Network\SockDataToken.cs" />
|
<Compile Include="Network\SockDataToken.cs" />
|
||||||
<Compile Include="Network\SocketFactory.cs" />
|
<Compile Include="Network\SocketFactory.cs" />
|
||||||
|
@ -184,6 +186,7 @@
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="packages.config" />
|
<None Include="packages.config" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
<ItemGroup />
|
||||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<PostBuildEvent>
|
<PostBuildEvent>
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using Microsoft.Spark.CSharp.Network;
|
||||||
|
|
||||||
namespace Microsoft.Spark.CSharp.Core
|
namespace Microsoft.Spark.CSharp.Core
|
||||||
{
|
{
|
||||||
|
@ -11,6 +12,6 @@ namespace Microsoft.Spark.CSharp.Core
|
||||||
/// </summary>
|
/// </summary>
|
||||||
interface IRDDCollector
|
interface IRDDCollector
|
||||||
{
|
{
|
||||||
IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type);
|
IEnumerable<dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
using Microsoft.Spark.CSharp.Network;
|
||||||
using Microsoft.Spark.CSharp.Proxy;
|
using Microsoft.Spark.CSharp.Proxy;
|
||||||
using Microsoft.Spark.CSharp.Services;
|
using Microsoft.Spark.CSharp.Services;
|
||||||
|
|
||||||
|
@ -60,6 +61,7 @@ namespace Microsoft.Spark.CSharp.Core
|
||||||
{
|
{
|
||||||
return sparkContext;
|
return sparkContext;
|
||||||
}
|
}
|
||||||
|
set { sparkContext = value; }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -592,13 +594,13 @@ namespace Microsoft.Spark.CSharp.Core
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
public T[] Collect()
|
public T[] Collect()
|
||||||
{
|
{
|
||||||
int port = RddProxy.CollectAndServe();
|
var info = RddProxy.CollectAndServe();
|
||||||
return Collect(port).Cast<T>().ToArray();
|
return Collect(info).Cast<T>().ToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
internal IEnumerable<dynamic> Collect(int port)
|
internal IEnumerable<dynamic> Collect(SocketInfo info)
|
||||||
{
|
{
|
||||||
return RddProxy.RDDCollector.Collect(port, serializedMode, typeof(T));
|
return RddProxy.RDDCollector.Collect(info, serializedMode, typeof(T));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -830,9 +832,9 @@ namespace Microsoft.Spark.CSharp.Core
|
||||||
|
|
||||||
|
|
||||||
var mappedRDD = MapPartitionsWithIndex<T>(new TakeHelper<T>(left).Execute);
|
var mappedRDD = MapPartitionsWithIndex<T>(new TakeHelper<T>(left).Execute);
|
||||||
int port = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, partitions);
|
var info = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, partitions);
|
||||||
|
|
||||||
IEnumerable<T> res = Collect(port).Cast<T>();
|
IEnumerable<T> res = Collect(info).Cast<T>();
|
||||||
|
|
||||||
items.AddRange(res);
|
items.AddRange(res);
|
||||||
partsScanned += numPartsToTry;
|
partsScanned += numPartsToTry;
|
||||||
|
@ -925,7 +927,7 @@ namespace Microsoft.Spark.CSharp.Core
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
public RDD<T> Repartition(int numPartitions)
|
public RDD<T> Repartition(int numPartitions)
|
||||||
{
|
{
|
||||||
return new RDD<T>(RddProxy.Repartition(numPartitions), sparkContext);
|
return new RDD<T>(RddProxy.Repartition(numPartitions), sparkContext, serializedMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -942,7 +944,7 @@ namespace Microsoft.Spark.CSharp.Core
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
public RDD<T> Coalesce(int numPartitions, bool shuffle = false)
|
public RDD<T> Coalesce(int numPartitions, bool shuffle = false)
|
||||||
{
|
{
|
||||||
return new RDD<T>(RddProxy.Coalesce(numPartitions, shuffle), sparkContext);
|
return new RDD<T>(RddProxy.Coalesce(numPartitions, shuffle), sparkContext, serializedMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -1065,8 +1067,8 @@ namespace Microsoft.Spark.CSharp.Core
|
||||||
foreach (int partition in Enumerable.Range(0, GetNumPartitions()))
|
foreach (int partition in Enumerable.Range(0, GetNumPartitions()))
|
||||||
{
|
{
|
||||||
var mappedRDD = MapPartitionsWithIndex<T>((pid, iter) => iter);
|
var mappedRDD = MapPartitionsWithIndex<T>((pid, iter) => iter);
|
||||||
int port = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, Enumerable.Range(partition, 1));
|
var info = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, Enumerable.Range(partition, 1));
|
||||||
foreach (T row in Collect(port))
|
foreach (T row in Collect(info))
|
||||||
yield return row;
|
yield return row;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,7 @@ using System.Runtime.Serialization.Formatters.Binary;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
using Microsoft.Spark.CSharp.Network;
|
using Microsoft.Spark.CSharp.Network;
|
||||||
|
using Microsoft.Spark.CSharp.Services;
|
||||||
using Microsoft.Spark.CSharp.Sql;
|
using Microsoft.Spark.CSharp.Sql;
|
||||||
|
|
||||||
namespace Microsoft.Spark.CSharp.Core
|
namespace Microsoft.Spark.CSharp.Core
|
||||||
|
@ -20,14 +21,31 @@ namespace Microsoft.Spark.CSharp.Core
|
||||||
/// </summary>
|
/// </summary>
|
||||||
class RDDCollector : IRDDCollector
|
class RDDCollector : IRDDCollector
|
||||||
{
|
{
|
||||||
public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
|
private static ILoggerService logger;
|
||||||
|
private static ILoggerService Logger
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
if (logger != null) return logger;
|
||||||
|
logger = LoggerServiceFactory.GetLogger(typeof(RDDCollector));
|
||||||
|
return logger;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public IEnumerable<dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type)
|
||||||
{
|
{
|
||||||
IFormatter formatter = new BinaryFormatter();
|
IFormatter formatter = new BinaryFormatter();
|
||||||
var sock = SocketFactory.CreateSocket();
|
var sock = SocketFactory.CreateSocket();
|
||||||
sock.Connect(IPAddress.Loopback, port);
|
sock.Connect(IPAddress.Loopback, info.Port, null);
|
||||||
|
|
||||||
using (var s = sock.GetStream())
|
using (var s = sock.GetStream())
|
||||||
{
|
{
|
||||||
|
if (info.Secret != null)
|
||||||
|
{
|
||||||
|
SerDe.Write(s, info.Secret);
|
||||||
|
var reply = SerDe.ReadString(s);
|
||||||
|
Logger.LogDebug("Connect back to JVM: " + reply);
|
||||||
|
}
|
||||||
byte[] buffer;
|
byte[] buffer;
|
||||||
while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0)
|
while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0)
|
||||||
{
|
{
|
||||||
|
|
|
@ -36,7 +36,7 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
|
||||||
if (!sockets.TryDequeue(out socket))
|
if (!sockets.TryDequeue(out socket))
|
||||||
{
|
{
|
||||||
socket = SocketFactory.CreateSocket();
|
socket = SocketFactory.CreateSocket();
|
||||||
socket.Connect(IPAddress.Loopback, portNumber);
|
socket.Connect(IPAddress.Loopback, portNumber, null);
|
||||||
}
|
}
|
||||||
return socket;
|
return socket;
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,12 +12,12 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
|
||||||
/// Reference to object created in JVM
|
/// Reference to object created in JVM
|
||||||
/// </summary>
|
/// </summary>
|
||||||
[Serializable]
|
[Serializable]
|
||||||
internal class JvmObjectReference
|
public class JvmObjectReference
|
||||||
{
|
{
|
||||||
public string Id { get; private set; }
|
public string Id { get; private set; }
|
||||||
private DateTime creationTime;
|
private DateTime creationTime;
|
||||||
|
|
||||||
public JvmObjectReference(string jvmReferenceId)
|
internal JvmObjectReference(string jvmReferenceId)
|
||||||
{
|
{
|
||||||
Id = jvmReferenceId;
|
Id = jvmReferenceId;
|
||||||
creationTime = DateTime.UtcNow;
|
creationTime = DateTime.UtcNow;
|
||||||
|
@ -48,6 +48,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
|
||||||
return base.GetHashCode();
|
return base.GetHashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public string ObjectToString()
|
||||||
|
{
|
||||||
|
return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "toString").ToString();
|
||||||
|
}
|
||||||
|
|
||||||
public string GetDebugInfo()
|
public string GetDebugInfo()
|
||||||
{
|
{
|
||||||
var javaObjectReferenceForClassObject = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "getClass").ToString());
|
var javaObjectReferenceForClassObject = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "getClass").ToString());
|
||||||
|
|
|
@ -31,6 +31,8 @@ namespace Microsoft.Spark.CSharp.Interop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal static IJvmBridge JvmBridge => SparkCLRIpcProxy.JvmBridge;
|
||||||
|
|
||||||
internal static IConfigurationService configurationService;
|
internal static IConfigurationService configurationService;
|
||||||
|
|
||||||
internal static IConfigurationService ConfigurationService
|
internal static IConfigurationService ConfigurationService
|
||||||
|
|
|
@ -11,7 +11,7 @@ namespace Microsoft.Spark.CSharp.Network
|
||||||
/// ByteBuf delimits a section of a ByteBufChunk.
|
/// ByteBuf delimits a section of a ByteBufChunk.
|
||||||
/// It is the smallest unit to be allocated.
|
/// It is the smallest unit to be allocated.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
internal class ByteBuf
|
public class ByteBuf
|
||||||
{
|
{
|
||||||
private int readerIndex;
|
private int readerIndex;
|
||||||
private int writerIndex;
|
private int writerIndex;
|
||||||
|
|
|
@ -2,9 +2,12 @@
|
||||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Net;
|
using System.Net;
|
||||||
using System.Net.Sockets;
|
using System.Net.Sockets;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading;
|
||||||
using Microsoft.Spark.CSharp.Configuration;
|
using Microsoft.Spark.CSharp.Configuration;
|
||||||
using Microsoft.Spark.CSharp.Services;
|
using Microsoft.Spark.CSharp.Services;
|
||||||
|
|
||||||
|
@ -63,12 +66,30 @@ namespace Microsoft.Spark.CSharp.Network
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||||
/// <param name="port">The port number of the remote host</param>
|
/// <param name="port">The port number of the remote host</param>
|
||||||
public void Connect(IPAddress remoteaddr, int port)
|
public void Connect(IPAddress remoteaddr, int port, string secret)
|
||||||
{
|
{
|
||||||
var remoteEndPoint = new IPEndPoint(remoteaddr, port);
|
var remoteEndPoint = new IPEndPoint(remoteaddr, port);
|
||||||
innerSocket.Connect(remoteEndPoint);
|
innerSocket.Connect(remoteEndPoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static byte[] ReceiveAll(Socket socket, int len)
|
||||||
|
{
|
||||||
|
var buffer = new List<byte>();
|
||||||
|
|
||||||
|
while (socket.Available > 0 && buffer.Count < len)
|
||||||
|
{
|
||||||
|
var currByte = new Byte[1];
|
||||||
|
var byteCounter = socket.Receive(currByte, currByte.Length, SocketFlags.None);
|
||||||
|
|
||||||
|
if (byteCounter.Equals(1))
|
||||||
|
{
|
||||||
|
buffer.Add(currByte[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return buffer.ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Returns the NetworkStream used to send and receive data.
|
/// Returns the NetworkStream used to send and receive data.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
@ -11,7 +11,7 @@ namespace Microsoft.Spark.CSharp.Network
|
||||||
/// ISocketWrapper interface defines the common methods to operate a socket (traditional socket or
|
/// ISocketWrapper interface defines the common methods to operate a socket (traditional socket or
|
||||||
/// Windows Registered IO socket)
|
/// Windows Registered IO socket)
|
||||||
/// </summary>
|
/// </summary>
|
||||||
internal interface ISocketWrapper : IDisposable
|
public interface ISocketWrapper : IDisposable
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Accepts a incoming connection request.
|
/// Accepts a incoming connection request.
|
||||||
|
@ -29,7 +29,8 @@ namespace Microsoft.Spark.CSharp.Network
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||||
/// <param name="port">The port number of the remote host</param>
|
/// <param name="port">The port number of the remote host</param>
|
||||||
void Connect(IPAddress remoteaddr, int port);
|
/// <param name="secret">The secret to connect, can be null</param>
|
||||||
|
void Connect(IPAddress remoteaddr, int port, string secret);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Returns a stream used to send and receive data.
|
/// Returns a stream used to send and receive data.
|
||||||
|
|
|
@ -151,7 +151,7 @@ namespace Microsoft.Spark.CSharp.Network
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||||
/// <param name="port">The port number of the remote host</param>
|
/// <param name="port">The port number of the remote host</param>
|
||||||
public void Connect(IPAddress remoteaddr, int port)
|
public void Connect(IPAddress remoteaddr, int port, string secret)
|
||||||
{
|
{
|
||||||
EnsureAccessible();
|
EnsureAccessible();
|
||||||
|
|
||||||
|
|
|
@ -111,7 +111,7 @@ namespace Microsoft.Spark.CSharp.Network
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||||
/// <param name="port">The port number of the remote host</param>
|
/// <param name="port">The port number of the remote host</param>
|
||||||
public void Connect(IPAddress remoteaddr, int port)
|
public void Connect(IPAddress remoteaddr, int port, string secret)
|
||||||
{
|
{
|
||||||
var remoteEndPoint = new IPEndPoint(remoteaddr, port);
|
var remoteEndPoint = new IPEndPoint(remoteaddr, port);
|
||||||
innerSocket.Connect(remoteEndPoint);
|
innerSocket.Connect(remoteEndPoint);
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
|
|
||||||
|
namespace Microsoft.Spark.CSharp.Network
|
||||||
|
{
|
||||||
|
public class SocketInfo
|
||||||
|
{
|
||||||
|
public readonly int Port;
|
||||||
|
public readonly string Secret;
|
||||||
|
|
||||||
|
public SocketInfo(int port, string secret)
|
||||||
|
{
|
||||||
|
Port = port;
|
||||||
|
Secret = secret;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SocketInfo Parse(object o)
|
||||||
|
{
|
||||||
|
var oo = o as List<JvmObjectReference>;
|
||||||
|
if (oo == null) throw new Exception(o.ToString() + " is not socket info "+typeof(List<JvmObjectReference>)+" "+o.GetType());
|
||||||
|
return new SocketInfo(int.Parse(oo[0].ObjectToString()), oo[1].ObjectToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,7 +13,7 @@ namespace Microsoft.Spark.CSharp.Proxy
|
||||||
IRDDProxy JavaToCSharp();
|
IRDDProxy JavaToCSharp();
|
||||||
string GetQueryExecution();
|
string GetQueryExecution();
|
||||||
string GetExecutedPlan();
|
string GetExecutedPlan();
|
||||||
string GetShowString(int numberOfRows, bool truncate);
|
string GetShowString(int numberOfRows, int truncate, bool vertical);
|
||||||
bool IsLocal();
|
bool IsLocal();
|
||||||
IStructTypeProxy GetSchema();
|
IStructTypeProxy GetSchema();
|
||||||
IRDDProxy ToJSON();
|
IRDDProxy ToJSON();
|
||||||
|
@ -59,6 +59,8 @@ namespace Microsoft.Spark.CSharp.Proxy
|
||||||
IDataFrameProxy Repartition(int numPartitions, IColumnProxy[] columns);
|
IDataFrameProxy Repartition(int numPartitions, IColumnProxy[] columns);
|
||||||
IDataFrameProxy Repartition(IColumnProxy[] columns);
|
IDataFrameProxy Repartition(IColumnProxy[] columns);
|
||||||
IDataFrameProxy Sample(bool withReplacement, double fraction, long seed);
|
IDataFrameProxy Sample(bool withReplacement, double fraction, long seed);
|
||||||
|
IDataFrameProxy Broadcast();
|
||||||
|
|
||||||
IDataFrameWriterProxy Write();
|
IDataFrameWriterProxy Write();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using Microsoft.Spark.CSharp.Core;
|
using Microsoft.Spark.CSharp.Core;
|
||||||
|
using Microsoft.Spark.CSharp.Network;
|
||||||
|
|
||||||
namespace Microsoft.Spark.CSharp.Proxy
|
namespace Microsoft.Spark.CSharp.Proxy
|
||||||
{
|
{
|
||||||
|
@ -41,6 +42,6 @@ namespace Microsoft.Spark.CSharp.Proxy
|
||||||
void SaveAsSequenceFile(string path, string compressionCodecClass);
|
void SaveAsSequenceFile(string path, string compressionCodecClass);
|
||||||
void SaveAsTextFile(string path, string compressionCodecClass);
|
void SaveAsTextFile(string path, string compressionCodecClass);
|
||||||
long Count();
|
long Count();
|
||||||
int CollectAndServe();
|
SocketInfo CollectAndServe();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using Microsoft.Spark.CSharp.Core;
|
using Microsoft.Spark.CSharp.Core;
|
||||||
using Microsoft.Spark.CSharp.Interop;
|
using Microsoft.Spark.CSharp.Interop;
|
||||||
|
using Microsoft.Spark.CSharp.Network;
|
||||||
|
|
||||||
|
|
||||||
namespace Microsoft.Spark.CSharp.Proxy
|
namespace Microsoft.Spark.CSharp.Proxy
|
||||||
|
@ -50,7 +51,7 @@ namespace Microsoft.Spark.CSharp.Proxy
|
||||||
void CancelJobGroup(string groupId);
|
void CancelJobGroup(string groupId);
|
||||||
void CancelAllJobs();
|
void CancelAllJobs();
|
||||||
IStatusTrackerProxy StatusTracker { get; }
|
IStatusTrackerProxy StatusTracker { get; }
|
||||||
int RunJob(IRDDProxy rdd, IEnumerable<int> partitions);
|
SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions);
|
||||||
IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId);
|
IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId);
|
||||||
IRDDProxy CreateCSharpRdd(IRDDProxy prefvJavaRddReference, byte[] command, Dictionary<string, string> environmentVariables, List<string> pythonIncludes, bool preservePartitioning, List<Broadcast> broadcastVariables, List<byte[]> accumulator);
|
IRDDProxy CreateCSharpRdd(IRDDProxy prefvJavaRddReference, byte[] command, Dictionary<string, string> environmentVariables, List<string> pythonIncludes, bool preservePartitioning, List<Broadcast> broadcastVariables, List<byte[]> accumulator);
|
||||||
IRDDProxy CreatePairwiseRDD(IRDDProxy javaReferenceInByteArrayRdd, int numPartitions, long partitionFuncId);
|
IRDDProxy CreatePairwiseRDD(IRDDProxy javaReferenceInByteArrayRdd, int numPartitions, long partitionFuncId);
|
||||||
|
|
|
@ -79,12 +79,12 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(executedPlanReference, "toString", new object[] { }).ToString();
|
return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(executedPlanReference, "toString", new object[] { }).ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public string GetShowString(int numberOfRows, bool truncate)
|
public string GetShowString(int numberOfRows, int truncate, bool vertical)
|
||||||
{
|
{
|
||||||
return
|
return
|
||||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
|
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
|
||||||
jvmDataFrameReference, "showString",
|
jvmDataFrameReference, "showString",
|
||||||
new object[] { numberOfRows, truncate }).ToString();
|
new object[] { numberOfRows, truncate, vertical}).ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool IsLocal()
|
public bool IsLocal()
|
||||||
|
@ -575,6 +575,15 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
new object[] { withReplacement, fraction, seed }).ToString()), sqlContextProxy);
|
new object[] { withReplacement, fraction, seed }).ToString()), sqlContextProxy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public IDataFrameProxy Broadcast()
|
||||||
|
{
|
||||||
|
return
|
||||||
|
new DataFrameIpcProxy(
|
||||||
|
new JvmObjectReference(
|
||||||
|
SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.functions", "broadcast",
|
||||||
|
new object[] { jvmDataFrameReference }).ToString()), sqlContextProxy);
|
||||||
|
}
|
||||||
|
|
||||||
public IDataFrameWriterProxy Write()
|
public IDataFrameWriterProxy Write()
|
||||||
{
|
{
|
||||||
return new DataFrameWriterIpcProxy(new JvmObjectReference(
|
return new DataFrameWriterIpcProxy(new JvmObjectReference(
|
||||||
|
|
|
@ -12,6 +12,7 @@ using System.Threading.Tasks;
|
||||||
using Microsoft.Spark.CSharp.Core;
|
using Microsoft.Spark.CSharp.Core;
|
||||||
using Microsoft.Spark.CSharp.Interop;
|
using Microsoft.Spark.CSharp.Interop;
|
||||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
|
using Microsoft.Spark.CSharp.Network;
|
||||||
|
|
||||||
namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
{
|
{
|
||||||
|
@ -66,10 +67,10 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
return long.Parse(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(rdd, "count").ToString());
|
return long.Parse(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(rdd, "count").ToString());
|
||||||
}
|
}
|
||||||
|
|
||||||
public int CollectAndServe()
|
public SocketInfo CollectAndServe()
|
||||||
{
|
{
|
||||||
var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd"));
|
var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd"));
|
||||||
return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "collectAndServe", new object[] { rdd }).ToString());
|
return SocketInfo.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "collectAndServe", new object[] { rdd }));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ using System.Threading.Tasks;
|
||||||
using Microsoft.Spark.CSharp.Core;
|
using Microsoft.Spark.CSharp.Core;
|
||||||
using Microsoft.Spark.CSharp.Interop;
|
using Microsoft.Spark.CSharp.Interop;
|
||||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
|
using Microsoft.Spark.CSharp.Network;
|
||||||
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||||
|
|
||||||
namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
|
@ -134,10 +135,8 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
|
|
||||||
public void Accumulator(int port)
|
public void Accumulator(int port)
|
||||||
{
|
{
|
||||||
jvmAccumulatorReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "accumulator",
|
jvmAccumulatorReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonAccumulatorV2", IPAddress.Loopback.ToString(), port);
|
||||||
SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList"),
|
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkContextReference, "register", new object[] { jvmAccumulatorReference });
|
||||||
SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonAccumulatorParam", IPAddress.Loopback.ToString(), port)
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Stop()
|
public void Stop()
|
||||||
|
@ -241,7 +240,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
|
|
||||||
public void SetJobGroup(string groupId, string description, bool interruptOnCancel)
|
public void SetJobGroup(string groupId, string description, bool interruptOnCancel)
|
||||||
{
|
{
|
||||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "setCheckpointDir", new object[] { groupId, description, interruptOnCancel });
|
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "setJobGroup", new object[] { groupId, description, interruptOnCancel });
|
||||||
}
|
}
|
||||||
|
|
||||||
public void SetLocalProperty(string key, string value)
|
public void SetLocalProperty(string key, string value)
|
||||||
|
@ -344,10 +343,10 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
|
public SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
|
||||||
{
|
{
|
||||||
var jpartitions = JvmBridgeUtils.GetJavaList<int>(partitions);
|
var jpartitions = JvmBridgeUtils.GetJavaList<int>(partitions);
|
||||||
return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }).ToString());
|
return SocketInfo.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }));
|
||||||
}
|
}
|
||||||
|
|
||||||
public IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId)
|
public IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId)
|
||||||
|
|
|
@ -27,6 +27,8 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal JvmObjectReference JvmReference => jvmSparkSessionReference;
|
||||||
|
|
||||||
public ISqlContextProxy SqlContextProxy
|
public ISqlContextProxy SqlContextProxy
|
||||||
{
|
{
|
||||||
get { return sqlContextProxy; }
|
get { return sqlContextProxy; }
|
||||||
|
|
|
@ -106,7 +106,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||||
|
|
||||||
var udf = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.execution.python.UserDefinedPythonFunction", new object[]
|
var udf = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.execution.python.UserDefinedPythonFunction", new object[]
|
||||||
{
|
{
|
||||||
name, function, dt
|
name, function, dt, 100 /*BatchUDF*/, true /*deterministic*/
|
||||||
});
|
});
|
||||||
|
|
||||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython", new object[] { name, udf });
|
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython", new object[] { name, udf });
|
||||||
|
|
|
@ -6,7 +6,9 @@ using System.Collections.Generic;
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using Microsoft.Spark.CSharp.Core;
|
using Microsoft.Spark.CSharp.Core;
|
||||||
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
using Microsoft.Spark.CSharp.Proxy;
|
using Microsoft.Spark.CSharp.Proxy;
|
||||||
|
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||||
using Microsoft.Spark.CSharp.Services;
|
using Microsoft.Spark.CSharp.Services;
|
||||||
|
|
||||||
namespace Microsoft.Spark.CSharp.Sql
|
namespace Microsoft.Spark.CSharp.Sql
|
||||||
|
@ -66,6 +68,8 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal JvmObjectReference JvmReference => (dataFrameProxy as DataFrameIpcProxy)?.JvmDataFrameReference;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Returns true if the collect and take methods can be run locally (without any Spark executors).
|
/// Returns true if the collect and take methods can be run locally (without any Spark executors).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -145,10 +149,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="numberOfRows">Number of rows to display - default 20</param>
|
/// <param name="numberOfRows">Number of rows to display - default 20</param>
|
||||||
/// <param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
|
/// <param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
|
||||||
public void Show(int numberOfRows = 20, bool truncate = true)
|
/// <param name="vertical">If set to True, print output rows vertically (one line per column value).</param>
|
||||||
|
public void Show(int numberOfRows = 20, int truncate = 20, bool vertical = false)
|
||||||
{
|
{
|
||||||
logger.LogInfo("Writing {0} rows in the DataFrame to Console output", numberOfRows);
|
logger.LogInfo("Writing {0} rows in the DataFrame to Console output", numberOfRows);
|
||||||
Console.WriteLine(dataFrameProxy.GetShowString(numberOfRows, truncate));
|
Console.WriteLine(dataFrameProxy.GetShowString(numberOfRows, truncate, vertical));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -166,8 +171,8 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public IEnumerable<Row> Collect()
|
public IEnumerable<Row> Collect()
|
||||||
{
|
{
|
||||||
int port = RddProxy.CollectAndServe();
|
var info = RddProxy.CollectAndServe();
|
||||||
return Rdd.Collect(port).Cast<Row>();
|
return Rdd.Collect(info).Cast<Row>();
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO - add this method if needed to convert Row to collection of T
|
//TODO - add this method if needed to convert Row to collection of T
|
||||||
|
@ -917,10 +922,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
|
/// Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
/// <param name="type">Persist storage type</param>
|
||||||
// Python API: https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py persist(self, storageLevel)
|
// Python API: https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py persist(self, storageLevel)
|
||||||
public DataFrame Persist()
|
public DataFrame Persist(StorageLevelType type= StorageLevelType.MEMORY_AND_DISK)
|
||||||
{
|
{
|
||||||
dataFrameProxy.Persist(StorageLevelType.MEMORY_AND_DISK);
|
dataFrameProxy.Persist(type);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -944,6 +950,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
return Persist();
|
return Persist();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DataFrame Broadcast()
|
||||||
|
{
|
||||||
|
return new DataFrame(dataFrameProxy.Broadcast(), sparkContext);
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Returns a new DataFrame that has exactly `numPartitions` partitions.
|
/// Returns a new DataFrame that has exactly `numPartitions` partitions.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
@ -159,5 +159,18 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
logger.LogInfo("Constructing DataFrame using Parquet source {0}", string.Join(";", path));
|
logger.LogInfo("Constructing DataFrame using Parquet source {0}", string.Join(";", path));
|
||||||
return new DataFrame(dataFrameReaderProxy.Parquet(path), sparkContext);
|
return new DataFrame(dataFrameReaderProxy.Parquet(path), sparkContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Loads a AVRO file (one object per line) and returns the result as a DataFrame.
|
||||||
|
///
|
||||||
|
/// This function goes through the input once to determine the input schema. If you know the
|
||||||
|
/// schema in advance, use the version that specifies the schema to avoid the extra scan.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="path">input path</param>
|
||||||
|
public DataFrame Avro(string path)
|
||||||
|
{
|
||||||
|
logger.LogInfo("Constructing DataFrame using AVRO source {0}", path);
|
||||||
|
return Format("com.databricks.spark.avro").Load(path);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -170,5 +170,16 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
{
|
{
|
||||||
Format("parquet").Save(path);
|
Format("parquet").Save(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Saves the content of the DataFrame in AVRO format at the specified path.
|
||||||
|
/// This is equivalent to:
|
||||||
|
/// Format("com.databricks.spark.avro").Save(path)
|
||||||
|
/// </summary>
|
||||||
|
public void Avro(string path)
|
||||||
|
{
|
||||||
|
Format("com.databricks.spark.avro").Save(path);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,7 +92,8 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="numberOfRows">Number of rows - default is 20</param>
|
/// <param name="numberOfRows">Number of rows - default is 20</param>
|
||||||
/// <param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
|
/// <param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
|
||||||
public void Show(int numberOfRows = 20, bool truncate = true)
|
/// <param name="vertical">If set to true, prints output rows vertically (one line per column value).</param>
|
||||||
|
public void Show(int numberOfRows = 20, int truncate = 20, bool vertical = false)
|
||||||
{
|
{
|
||||||
ToDF().Show(numberOfRows, truncate);
|
ToDF().Show(numberOfRows, truncate);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
using System.Reflection;
|
||||||
|
using System.Runtime.Serialization;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
@ -1119,5 +1121,42 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
return input.Select(a => func((A1)(a[0]), (A2)(a[1]), (A3)(a[2]), (A4)(a[3]), (A5)(a[4]), (A6)(a[5]), (A7)(a[6]), (A8)(a[7]), (A9)(a[8]), (A10)(a[9]))).Cast<dynamic>();
|
return input.Select(a => func((A1)(a[0]), (A2)(a[1]), (A3)(a[2]), (A4)(a[3]), (A5)(a[4]), (A6)(a[5]), (A7)(a[6]), (A8)(a[7]), (A9)(a[8]), (A10)(a[9]))).Cast<dynamic>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Serializable]
|
||||||
|
internal class UdfReflectionHelper
|
||||||
|
{
|
||||||
|
private readonly MethodInfo func;
|
||||||
|
|
||||||
|
[NonSerialized]
|
||||||
|
private object[] _cache;
|
||||||
|
|
||||||
|
internal UdfReflectionHelper(MethodInfo f)
|
||||||
|
{
|
||||||
|
func = f;
|
||||||
|
_cache = new object[func.GetParameters().Length];
|
||||||
|
}
|
||||||
|
|
||||||
|
public Type ReturnType => func.ReturnType;
|
||||||
|
|
||||||
|
[OnDeserialized()]
|
||||||
|
public void Init(StreamingContext context)
|
||||||
|
{
|
||||||
|
_cache = new object[func.GetParameters().Length];
|
||||||
|
}
|
||||||
|
|
||||||
|
internal IEnumerable<dynamic> Execute(int pid, IEnumerable<dynamic> input)
|
||||||
|
{
|
||||||
|
return input.Select(Run).Cast<dynamic>();
|
||||||
|
}
|
||||||
|
|
||||||
|
private dynamic Run(dynamic input)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < _cache.Length; ++i)
|
||||||
|
{
|
||||||
|
_cache[i] = input[i];
|
||||||
|
}
|
||||||
|
return func.Invoke(null, _cache);
|
||||||
|
}
|
||||||
|
}
|
||||||
#endregion
|
#endregion
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,8 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
[NonSerialized]
|
[NonSerialized]
|
||||||
private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(Row));
|
private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(Row));
|
||||||
|
|
||||||
|
public abstract dynamic[] Values { get; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Number of elements in the Row.
|
/// Number of elements in the Row.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -29,6 +31,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public abstract StructType GetSchema();
|
public abstract StructType GetSchema();
|
||||||
|
|
||||||
|
public virtual void ResetValues(dynamic[] values)
|
||||||
|
{
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Returns the value at position i.
|
/// Returns the value at position i.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -80,8 +87,22 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
internal class RowImpl : Row
|
internal class RowImpl : Row
|
||||||
{
|
{
|
||||||
private readonly StructType schema;
|
private readonly StructType schema;
|
||||||
public dynamic[] Values { get { return values; } }
|
|
||||||
private readonly dynamic[] values;
|
public override dynamic[] Values
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
if (!valuesConverted)
|
||||||
|
{
|
||||||
|
schema.ConvertPickleObjects(rawValues,rawValues);
|
||||||
|
valuesConverted = true;
|
||||||
|
}
|
||||||
|
return rawValues;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private dynamic[] rawValues;
|
||||||
|
private bool valuesConverted = false;
|
||||||
|
|
||||||
private readonly int columnCount;
|
private readonly int columnCount;
|
||||||
|
|
||||||
|
@ -96,11 +117,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
{
|
{
|
||||||
if (data is dynamic[])
|
if (data is dynamic[])
|
||||||
{
|
{
|
||||||
values = data as dynamic[];
|
rawValues = data as dynamic[];
|
||||||
}
|
}
|
||||||
else if (data is List<dynamic>)
|
else if (data is List<dynamic>)
|
||||||
{
|
{
|
||||||
values = (data as List<dynamic>).ToArray();
|
rawValues = (data as List<dynamic>).ToArray();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -109,14 +130,22 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
|
|
||||||
this.schema = schema;
|
this.schema = schema;
|
||||||
|
|
||||||
columnCount = values.Count();
|
columnCount = rawValues.Length;
|
||||||
int schemaColumnCount = this.schema.Fields.Count();
|
int schemaColumnCount = this.schema.Fields.Count;
|
||||||
if (columnCount != schemaColumnCount)
|
if (columnCount != schemaColumnCount)
|
||||||
{
|
{
|
||||||
throw new Exception(string.Format("column count inferred from data ({0}) and schema ({1}) mismatch", columnCount, schemaColumnCount));
|
throw new Exception(string.Format("column count inferred from data ({0}) and schema ({1}) mismatch", columnCount, schemaColumnCount));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Initialize();
|
public override void ResetValues(dynamic[] values)
|
||||||
|
{
|
||||||
|
if (columnCount != values.Length)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("column count inferred from data and schema mismatch");
|
||||||
|
}
|
||||||
|
rawValues = values;
|
||||||
|
valuesConverted = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override int Size()
|
public override int Size()
|
||||||
|
@ -131,16 +160,15 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
|
|
||||||
public override dynamic Get(int i)
|
public override dynamic Get(int i)
|
||||||
{
|
{
|
||||||
|
if (i >= 0 && i < columnCount) return Values[i];
|
||||||
if (i >= columnCount)
|
if (i >= columnCount)
|
||||||
{
|
{
|
||||||
throw new Exception(string.Format("i ({0}) >= columnCount ({1})", i, columnCount));
|
throw new Exception(string.Format("i ({0}) >= columnCount ({1})", i, columnCount));
|
||||||
}
|
}
|
||||||
else if(i < 0)
|
else
|
||||||
{
|
{
|
||||||
throw new Exception(string.Format("i ({0}) < 0", i));
|
throw new Exception(string.Format("i ({0}) < 0", i));
|
||||||
}
|
}
|
||||||
|
|
||||||
return values[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public override dynamic Get(string columnName)
|
public override dynamic Get(string columnName)
|
||||||
|
@ -152,7 +180,7 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
public override string ToString()
|
public override string ToString()
|
||||||
{
|
{
|
||||||
List<string> cols = new List<string>();
|
List<string> cols = new List<string>();
|
||||||
foreach (var item in values)
|
foreach (var item in Values)
|
||||||
{
|
{
|
||||||
if (item != null)
|
if (item != null)
|
||||||
{
|
{
|
||||||
|
@ -167,72 +195,6 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
return string.Format("[{0}]", string.Join(",", cols.ToArray()));
|
return string.Format("[{0}]", string.Join(",", cols.ToArray()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void Initialize()
|
|
||||||
{
|
|
||||||
|
|
||||||
int index = 0;
|
|
||||||
foreach (var field in schema.Fields)
|
|
||||||
{
|
|
||||||
if (field.DataType is ArrayType)
|
|
||||||
{
|
|
||||||
Func<DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
|
|
||||||
{
|
|
||||||
StructField[] fields = new StructField[length];
|
|
||||||
for(int i = 0; i < length ; i++)
|
|
||||||
{
|
|
||||||
fields[i] = new StructField(string.Format("_array_{0}", i), dataType);
|
|
||||||
}
|
|
||||||
return new StructType(fields);
|
|
||||||
};
|
|
||||||
var elementType = (field.DataType as ArrayType).ElementType;
|
|
||||||
|
|
||||||
// Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)),
|
|
||||||
// then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
|
|
||||||
// In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
|
|
||||||
object[] valueOfArray = values[index] is ArrayList ? (values[index] as ArrayList).ToArray() : values[index] as object[];
|
|
||||||
if (valueOfArray == null)
|
|
||||||
{
|
|
||||||
throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
|
|
||||||
}
|
|
||||||
|
|
||||||
values[index] = new RowImpl(valueOfArray, elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).values;
|
|
||||||
}
|
|
||||||
else if (field.DataType is MapType)
|
|
||||||
{
|
|
||||||
//TODO
|
|
||||||
throw new NotImplementedException();
|
|
||||||
}
|
|
||||||
else if (field.DataType is StructType)
|
|
||||||
{
|
|
||||||
dynamic value = values[index];
|
|
||||||
if (value != null)
|
|
||||||
{
|
|
||||||
var subRow = new RowImpl(values[index], field.DataType as StructType);
|
|
||||||
values[index] = subRow;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (field.DataType is DecimalType)
|
|
||||||
{
|
|
||||||
//TODO
|
|
||||||
throw new NotImplementedException();
|
|
||||||
}
|
|
||||||
else if (field.DataType is DateType)
|
|
||||||
{
|
|
||||||
//TODO
|
|
||||||
throw new NotImplementedException();
|
|
||||||
}
|
|
||||||
else if (field.DataType is StringType)
|
|
||||||
{
|
|
||||||
if (values[index] != null) values[index] = values[index].ToString();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
values[index] = values[index];
|
|
||||||
}
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,7 +86,7 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
int i = 0;
|
int i = 0;
|
||||||
foreach (var argument in arguments)
|
foreach (var argument in arguments)
|
||||||
{
|
{
|
||||||
if (argument != null && argument.GetType() == typeof(RowConstructor))
|
if (argument is RowConstructor)
|
||||||
{
|
{
|
||||||
values[i++] = (argument as RowConstructor).Values;
|
values[i++] = (argument as RowConstructor).Values;
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,9 @@ using System.Runtime.Remoting.Contexts;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using Microsoft.Spark.CSharp.Core;
|
using Microsoft.Spark.CSharp.Core;
|
||||||
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
using Microsoft.Spark.CSharp.Proxy;
|
using Microsoft.Spark.CSharp.Proxy;
|
||||||
|
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||||
using Microsoft.Spark.CSharp.Services;
|
using Microsoft.Spark.CSharp.Services;
|
||||||
using Microsoft.Spark.CSharp.Sql.Catalog;
|
using Microsoft.Spark.CSharp.Sql.Catalog;
|
||||||
|
|
||||||
|
@ -42,6 +44,8 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
get { return catalog ?? (catalog = new Catalog.Catalog(SparkSessionProxy.GetCatalog())); }
|
get { return catalog ?? (catalog = new Catalog.Catalog(SparkSessionProxy.GetCatalog())); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal JvmObjectReference JvmReference => (sparkSessionProxy as SparkSessionIpcProxy)?.JvmReference;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Interface through which the user may access the underlying SparkContext.
|
/// Interface through which the user may access the underlying SparkContext.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -114,7 +118,19 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
|
// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
|
||||||
// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside.
|
// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside.
|
||||||
// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
|
// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
|
||||||
var rddRow = rdd.Map(r => r);
|
var rddRow = rdd.MapPartitions(r => r.Select(rr => rr));
|
||||||
|
rddRow.serializedMode = SerializedMode.Row;
|
||||||
|
|
||||||
|
return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), SparkContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DataFrame CreateDataFrame(RDD<Row> rdd, StructType schema)
|
||||||
|
{
|
||||||
|
// Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker.
|
||||||
|
// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
|
||||||
|
// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside.
|
||||||
|
// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
|
||||||
|
var rddRow = rdd.MapPartitions(rows => rows.Select(r => r.Values));
|
||||||
rddRow.serializedMode = SerializedMode.Row;
|
rddRow.serializedMode = SerializedMode.Row;
|
||||||
|
|
||||||
return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), SparkContext);
|
return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), SparkContext);
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using System.Reflection;
|
||||||
using Microsoft.Spark.CSharp.Core;
|
using Microsoft.Spark.CSharp.Core;
|
||||||
using Microsoft.Spark.CSharp.Proxy;
|
using Microsoft.Spark.CSharp.Proxy;
|
||||||
using Microsoft.Spark.CSharp.Services;
|
using Microsoft.Spark.CSharp.Services;
|
||||||
|
@ -150,6 +151,18 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
|
return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DataFrame CreateDataFrame(RDD<Row> rdd, StructType schema)
|
||||||
|
{
|
||||||
|
// Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker.
|
||||||
|
// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
|
||||||
|
// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside.
|
||||||
|
// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
|
||||||
|
var rddRow = rdd.Map(r => r);
|
||||||
|
rddRow.serializedMode = SerializedMode.Row;
|
||||||
|
|
||||||
|
return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Registers the given <see cref="DataFrame"/> as a temporary table in the catalog.
|
/// Registers the given <see cref="DataFrame"/> as a temporary table in the catalog.
|
||||||
/// Temporary tables exist only during the lifetime of this instance of SqlContext.
|
/// Temporary tables exist only during the lifetime of this instance of SqlContext.
|
||||||
|
@ -527,6 +540,14 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
|
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
|
||||||
sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
|
sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void RegisterFunction(string name, MethodInfo f)
|
||||||
|
{
|
||||||
|
logger.LogInfo("Name of the function to register {0}, method info", name, f.DeclaringType?.FullName + "." + f.Name);
|
||||||
|
var helper = new UdfReflectionHelper(f);
|
||||||
|
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = helper.Execute;
|
||||||
|
sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(helper.ReturnType));
|
||||||
|
}
|
||||||
#endregion
|
#endregion
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -2,6 +2,7 @@
|
||||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
|
using System.Collections;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Reflection;
|
using System.Reflection;
|
||||||
|
@ -242,7 +243,7 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the regular expression that represents a fixed decimal.
|
/// Gets the regular expression that represents a fixed decimal.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static Regex FixedDecimal = new Regex(@"decimal\((\d+),\s(\d+)\)");
|
public static Regex FixedDecimal = new Regex(@"decimal\s*\((\d+),\s*(\d+)\)");
|
||||||
private int? precision, scale;
|
private int? precision, scale;
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Initializes a new instance of DecimalType from parameters specifying its precision and scale.
|
/// Initializes a new instance of DecimalType from parameters specifying its precision and scale.
|
||||||
|
@ -257,7 +258,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
|
|
||||||
internal override object JsonValue
|
internal override object JsonValue
|
||||||
{
|
{
|
||||||
get { throw new NotImplementedException(); }
|
get
|
||||||
|
{
|
||||||
|
if (precision == null && scale == null) return "decimal";
|
||||||
|
return "decimal(" + precision + "," + scale + ")";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -268,7 +273,7 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
/// <exception cref="NotImplementedException">Not implemented yet.</exception>
|
/// <exception cref="NotImplementedException">Not implemented yet.</exception>
|
||||||
public DataType FromJson(JObject json)
|
public DataType FromJson(JObject json)
|
||||||
{
|
{
|
||||||
throw new NotImplementedException();
|
return ParseDataTypeFromJson(json);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -454,6 +459,73 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public List<StructField> Fields { get { return fields; } }
|
public List<StructField> Fields { get { return fields; } }
|
||||||
|
|
||||||
|
|
||||||
|
private Lazy<Func<dynamic, dynamic>[]> pickleConverters;
|
||||||
|
|
||||||
|
private Func<dynamic, dynamic>[] ConstructPickleConverters()
|
||||||
|
{
|
||||||
|
var funcs = new Func<dynamic, dynamic>[fields.Count];
|
||||||
|
int index = 0;
|
||||||
|
foreach (var field in fields)
|
||||||
|
{
|
||||||
|
if (field.DataType is StringType)
|
||||||
|
{
|
||||||
|
funcs[index] = x => x?.ToString();
|
||||||
|
}
|
||||||
|
/*else if (field.DataType is LongType)
|
||||||
|
{
|
||||||
|
funcs[index] = x => x==null?null:(dynamic)(long)x ;
|
||||||
|
}*/
|
||||||
|
/*else if (field.DataType is DateType)
|
||||||
|
{
|
||||||
|
funcs[index] = x => x;
|
||||||
|
}*/
|
||||||
|
else if (field.DataType is ArrayType)
|
||||||
|
{
|
||||||
|
Func<DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
|
||||||
|
{
|
||||||
|
StructField[] f = new StructField[length];
|
||||||
|
for (int i = 0; i < length; i++)
|
||||||
|
{
|
||||||
|
f[i] = new StructField(string.Format("_array_{0}", i), dataType);
|
||||||
|
}
|
||||||
|
return new StructType(f);
|
||||||
|
};
|
||||||
|
var elementType = (field.DataType as ArrayType).ElementType;
|
||||||
|
funcs[index] = x =>
|
||||||
|
{
|
||||||
|
|
||||||
|
// Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)),
|
||||||
|
// then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
|
||||||
|
// In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
|
||||||
|
object[] valueOfArray = (x as ArrayList)?.ToArray() ?? x as object[];
|
||||||
|
if (valueOfArray == null)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new RowImpl(valueOfArray,
|
||||||
|
elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).Values; // TODO: this part may have some problems, not verified
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else if (field.DataType is MapType)
|
||||||
|
{
|
||||||
|
//TODO
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
else if (field.DataType is StructType)
|
||||||
|
{
|
||||||
|
funcs[index] = x => x != null ? new RowImpl(x, field.DataType as StructType) : null;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
funcs[index] = x => x;
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return funcs;
|
||||||
|
}
|
||||||
|
|
||||||
internal IStructTypeProxy StructTypeProxy
|
internal IStructTypeProxy StructTypeProxy
|
||||||
{
|
{
|
||||||
get
|
get
|
||||||
|
@ -472,11 +544,13 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
public StructType(IEnumerable<StructField> fields)
|
public StructType(IEnumerable<StructField> fields)
|
||||||
{
|
{
|
||||||
this.fields = fields.ToList();
|
this.fields = fields.ToList();
|
||||||
|
Initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
internal StructType(JObject json)
|
internal StructType(JObject json)
|
||||||
{
|
{
|
||||||
FromJson(json);
|
FromJson(json);
|
||||||
|
Initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
internal StructType(IStructTypeProxy structTypeProxy)
|
internal StructType(IStructTypeProxy structTypeProxy)
|
||||||
|
@ -484,6 +558,21 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
this.structTypeProxy = structTypeProxy;
|
this.structTypeProxy = structTypeProxy;
|
||||||
var jsonSchema = structTypeProxy.ToJson();
|
var jsonSchema = structTypeProxy.ToJson();
|
||||||
FromJson(jsonSchema);
|
FromJson(jsonSchema);
|
||||||
|
Initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void ConvertPickleObjects(dynamic[] input, dynamic[] output)
|
||||||
|
{
|
||||||
|
var c = pickleConverters.Value;
|
||||||
|
for (int i = 0; i < input.Length; ++i)
|
||||||
|
{
|
||||||
|
output[i] = c[i](input[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Initialize()
|
||||||
|
{
|
||||||
|
pickleConverters = new Lazy<Func<dynamic, dynamic>[]>(ConstructPickleConverters);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|
|
@ -5,6 +5,7 @@ using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Diagnostics.CodeAnalysis;
|
using System.Diagnostics.CodeAnalysis;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
using System.Reflection;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using Microsoft.Spark.CSharp.Core;
|
using Microsoft.Spark.CSharp.Core;
|
||||||
|
@ -249,6 +250,17 @@ namespace Microsoft.Spark.CSharp.Sql
|
||||||
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
|
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
|
||||||
udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
|
udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void RegisterFunction(string name, MethodInfo f)
|
||||||
|
{
|
||||||
|
if (!f.IsStatic)
|
||||||
|
throw new InvalidOperationException(f.DeclaringType?.FullName + "." + f.Name +
|
||||||
|
" is not a static method, can't be registered");
|
||||||
|
logger.LogInfo("Name of the function to register {0}, method info", name, f.DeclaringType?.FullName + "." + f.Name);
|
||||||
|
var helper = new UdfReflectionHelper(f);
|
||||||
|
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = helper.Execute;
|
||||||
|
udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(helper.ReturnType));
|
||||||
|
}
|
||||||
#endregion
|
#endregion
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<packages>
|
<packages>
|
||||||
<package id="log4net" version="2.0.5" targetFramework="net45" />
|
<package id="log4net" version="2.0.8" targetFramework="net45" />
|
||||||
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
|
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
|
||||||
<package id="Razorvine.Pyrolite" version="4.10.0" targetFramework="net45" />
|
<package id="Razorvine.Pyrolite" version="4.10.0" targetFramework="net45" />
|
||||||
<package id="Razorvine.Serpent" version="1.12.0" targetFramework="net45" />
|
<package id="Razorvine.Serpent" version="1.12.0" targetFramework="net45" />
|
||||||
</packages>
|
</packages>
|
|
@ -3513,7 +3513,7 @@
|
||||||
Close the socket connections and releases all associated resources.
|
Close the socket connections and releases all associated resources.
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
|
<member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
|
||||||
<summary>
|
<summary>
|
||||||
Establishes a connection to a remote host that is specified by an IP address and a port number
|
Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||||
</summary>
|
</summary>
|
||||||
|
@ -3612,12 +3612,13 @@
|
||||||
Close the ISocket connections and releases all associated resources.
|
Close the ISocket connections and releases all associated resources.
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
|
<member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
|
||||||
<summary>
|
<summary>
|
||||||
Establishes a connection to a remote host that is specified by an IP address and a port number
|
Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||||
</summary>
|
</summary>
|
||||||
<param name="remoteaddr">The IP address of the remote host</param>
|
<param name="remoteaddr">The IP address of the remote host</param>
|
||||||
<param name="port">The port number of the remote host</param>
|
<param name="port">The port number of the remote host</param>
|
||||||
|
<param name="secret">The secret to connect, can be null</param>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.GetStream">
|
<member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.GetStream">
|
||||||
<summary>
|
<summary>
|
||||||
|
@ -3770,7 +3771,7 @@
|
||||||
Close the ISocket connections and releases all associated resources.
|
Close the ISocket connections and releases all associated resources.
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Network.RioSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
|
<member name="M:Microsoft.Spark.CSharp.Network.RioSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
|
||||||
<summary>
|
<summary>
|
||||||
Establishes a connection to a remote host that is specified by an IP address and a port number
|
Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||||
</summary>
|
</summary>
|
||||||
|
@ -3912,7 +3913,7 @@
|
||||||
Close the ISocket connections and releases all associated resources.
|
Close the ISocket connections and releases all associated resources.
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
|
<member name="M:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
|
||||||
<summary>
|
<summary>
|
||||||
Establishes a connection to a remote host that is specified by an IP address and a port number
|
Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||||
</summary>
|
</summary>
|
||||||
|
@ -5190,12 +5191,13 @@
|
||||||
</summary>
|
</summary>
|
||||||
<returns>row count</returns>
|
<returns>row count</returns>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Show(System.Int32,System.Boolean)">
|
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Show(System.Int32,System.Int32,System.Boolean)">
|
||||||
<summary>
|
<summary>
|
||||||
Displays rows of the DataFrame in tabular form
|
Displays rows of the DataFrame in tabular form
|
||||||
</summary>
|
</summary>
|
||||||
<param name="numberOfRows">Number of rows to display - default 20</param>
|
<param name="numberOfRows">Number of rows to display - default 20</param>
|
||||||
<param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
|
<param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
|
||||||
|
<param name="vertical">If set to True, print output rows vertically (one line per column value).</param>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.ShowSchema">
|
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.ShowSchema">
|
||||||
<summary>
|
<summary>
|
||||||
|
@ -5627,10 +5629,11 @@
|
||||||
the 100 new partitions will claim 10 of the current partitions.
|
the 100 new partitions will claim 10 of the current partitions.
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Persist">
|
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Persist(Microsoft.Spark.CSharp.Core.StorageLevelType)">
|
||||||
<summary>
|
<summary>
|
||||||
Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
|
Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
|
||||||
</summary>
|
</summary>
|
||||||
|
<param name="type">Persist storage type</param>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Unpersist(System.Boolean)">
|
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Unpersist(System.Boolean)">
|
||||||
<summary>
|
<summary>
|
||||||
|
@ -6040,6 +6043,15 @@
|
||||||
DataFrame if no paths are passed in.
|
DataFrame if no paths are passed in.
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
|
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrameReader.Avro(System.String)">
|
||||||
|
<summary>
|
||||||
|
Loads a AVRO file (one object per line) and returns the result as a DataFrame.
|
||||||
|
|
||||||
|
This function goes through the input once to determine the input schema. If you know the
|
||||||
|
schema in advance, use the version that specifies the schema to avoid the extra scan.
|
||||||
|
</summary>
|
||||||
|
<param name="path">input path</param>
|
||||||
|
</member>
|
||||||
<member name="T:Microsoft.Spark.CSharp.Sql.DataFrameWriter">
|
<member name="T:Microsoft.Spark.CSharp.Sql.DataFrameWriter">
|
||||||
<summary>
|
<summary>
|
||||||
Interface used to write a DataFrame to external storage systems (e.g. file systems,
|
Interface used to write a DataFrame to external storage systems (e.g. file systems,
|
||||||
|
@ -6145,6 +6157,13 @@
|
||||||
Format("parquet").Save(path)
|
Format("parquet").Save(path)
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
|
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrameWriter.Avro(System.String)">
|
||||||
|
<summary>
|
||||||
|
Saves the content of the DataFrame in AVRO format at the specified path.
|
||||||
|
This is equivalent to:
|
||||||
|
Format("com.databricks.spark.avro").Save(path)
|
||||||
|
</summary>
|
||||||
|
</member>
|
||||||
<member name="T:Microsoft.Spark.CSharp.Sql.Dataset">
|
<member name="T:Microsoft.Spark.CSharp.Sql.Dataset">
|
||||||
<summary>
|
<summary>
|
||||||
Dataset is a strongly typed collection of domain-specific objects that can be transformed
|
Dataset is a strongly typed collection of domain-specific objects that can be transformed
|
||||||
|
@ -6193,13 +6212,14 @@
|
||||||
Returns all column names as an array.
|
Returns all column names as an array.
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Show(System.Int32,System.Boolean)">
|
<member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Show(System.Int32,System.Int32,System.Boolean)">
|
||||||
<summary>
|
<summary>
|
||||||
Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters
|
Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters
|
||||||
will be truncated, and all cells will be aligned right.
|
will be truncated, and all cells will be aligned right.
|
||||||
</summary>
|
</summary>
|
||||||
<param name="numberOfRows">Number of rows - default is 20</param>
|
<param name="numberOfRows">Number of rows - default is 20</param>
|
||||||
<param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
|
<param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
|
||||||
|
<param name="vertical">If set to true, prints output rows vertically (one line per column value).</param>
|
||||||
</member>
|
</member>
|
||||||
<member name="M:Microsoft.Spark.CSharp.Sql.Dataset.ShowSchema">
|
<member name="M:Microsoft.Spark.CSharp.Sql.Dataset.ShowSchema">
|
||||||
<summary>
|
<summary>
|
||||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -33,7 +33,7 @@ namespace AdapterTest
|
||||||
// get accumulator server port and connect to accumuator server
|
// get accumulator server port and connect to accumuator server
|
||||||
int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort;
|
int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort;
|
||||||
sock = SocketFactory.CreateSocket();
|
sock = SocketFactory.CreateSocket();
|
||||||
sock.Connect(IPAddress.Loopback, serverPort);
|
sock.Connect(IPAddress.Loopback, serverPort, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
[TearDown]
|
[TearDown]
|
||||||
|
|
|
@ -35,22 +35,25 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
|
<HintPath>..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
<Reference Include="Microsoft.CSharp" />
|
<Reference Include="Microsoft.CSharp" />
|
||||||
<Reference Include="Moq, Version=4.2.1510.2205, Culture=neutral, PublicKeyToken=69f491c39445e920, processorArchitecture=MSIL">
|
<Reference Include="Moq, Version=4.2.1510.2205, Culture=neutral, PublicKeyToken=69f491c39445e920, processorArchitecture=MSIL">
|
||||||
<HintPath>..\packages\Moq.4.2.1510.2205\lib\net40\Moq.dll</HintPath>
|
<HintPath>..\packages\Moq.4.2.1510.2205\lib\net40\Moq.dll</HintPath>
|
||||||
<Private>True</Private>
|
<Private>True</Private>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json">
|
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
<HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
|
<Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
|
||||||
<HintPath>..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
|
<HintPath>..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
|
||||||
<Private>True</Private>
|
<Private>True</Private>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Razorvine.Pyrolite">
|
<Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
<HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Razorvine.Serpent">
|
<Reference Include="Razorvine.Serpent, Version=1.12.0.35091, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
|
<HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="System" />
|
<Reference Include="System" />
|
||||||
|
|
|
@ -12,6 +12,7 @@ using Microsoft.Spark.CSharp.Sql;
|
||||||
using Microsoft.Spark.CSharp.Proxy;
|
using Microsoft.Spark.CSharp.Proxy;
|
||||||
using NUnit.Framework;
|
using NUnit.Framework;
|
||||||
using Moq;
|
using Moq;
|
||||||
|
using Microsoft.Spark.CSharp.Network;
|
||||||
|
|
||||||
namespace AdapterTest
|
namespace AdapterTest
|
||||||
{
|
{
|
||||||
|
@ -65,10 +66,10 @@ namespace AdapterTest
|
||||||
[Test]
|
[Test]
|
||||||
public void TestShow()
|
public void TestShow()
|
||||||
{
|
{
|
||||||
mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
|
mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
|
||||||
var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
|
var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
|
||||||
dataFrame.Show();
|
dataFrame.Show();
|
||||||
mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
|
mockDataFrameProxy.Verify(m => m.GetShowString(20, 20, false), Times.Once);
|
||||||
}
|
}
|
||||||
|
|
||||||
[Test]
|
[Test]
|
||||||
|
@ -135,9 +136,9 @@ namespace AdapterTest
|
||||||
var expectedRows = new Row[] {new MockRow(), new MockRow()};
|
var expectedRows = new Row[] {new MockRow(), new MockRow()};
|
||||||
var mockRddProxy = new Mock<IRDDProxy>();
|
var mockRddProxy = new Mock<IRDDProxy>();
|
||||||
var mockRddCollector = new Mock<IRDDCollector>();
|
var mockRddCollector = new Mock<IRDDCollector>();
|
||||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||||
.Returns(expectedRows);
|
.Returns(expectedRows);
|
||||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
|
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123,null));
|
||||||
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
||||||
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
||||||
var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
|
var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
|
||||||
|
@ -838,9 +839,9 @@ namespace AdapterTest
|
||||||
var expectedRows = new Row[] {new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow()};
|
var expectedRows = new Row[] {new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow()};
|
||||||
var mockRddProxy = new Mock<IRDDProxy>();
|
var mockRddProxy = new Mock<IRDDProxy>();
|
||||||
var mockRddCollector = new Mock<IRDDCollector>();
|
var mockRddCollector = new Mock<IRDDCollector>();
|
||||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||||
.Returns(expectedRows);
|
.Returns(expectedRows);
|
||||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
|
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
|
||||||
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
||||||
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
||||||
mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
|
mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
|
||||||
|
@ -868,9 +869,9 @@ namespace AdapterTest
|
||||||
var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
|
var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
|
||||||
var mockRddProxy = new Mock<IRDDProxy>();
|
var mockRddProxy = new Mock<IRDDProxy>();
|
||||||
var mockRddCollector = new Mock<IRDDCollector>();
|
var mockRddCollector = new Mock<IRDDCollector>();
|
||||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||||
.Returns(expectedRows);
|
.Returns(expectedRows);
|
||||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
|
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
|
||||||
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
||||||
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
||||||
mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
|
mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
|
||||||
|
@ -892,9 +893,9 @@ namespace AdapterTest
|
||||||
var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
|
var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
|
||||||
var mockRddProxy = new Mock<IRDDProxy>();
|
var mockRddProxy = new Mock<IRDDProxy>();
|
||||||
var mockRddCollector = new Mock<IRDDCollector>();
|
var mockRddCollector = new Mock<IRDDCollector>();
|
||||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||||
.Returns(expectedRows);
|
.Returns(expectedRows);
|
||||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
|
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
|
||||||
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
||||||
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
||||||
mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
|
mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
|
||||||
|
|
|
@ -38,12 +38,12 @@ namespace AdapterTest
|
||||||
public void TestShow()
|
public void TestShow()
|
||||||
{
|
{
|
||||||
Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
|
Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
|
||||||
mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
|
mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
|
||||||
mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
|
mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
|
||||||
|
|
||||||
var dataset = new Dataset(mockDatasetProxy.Object);
|
var dataset = new Dataset(mockDatasetProxy.Object);
|
||||||
dataset.Show();
|
dataset.Show();
|
||||||
mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
|
mockDataFrameProxy.Verify(m => m.GetShowString(20, 20, false), Times.Once);
|
||||||
}
|
}
|
||||||
|
|
||||||
[Test]
|
[Test]
|
||||||
|
|
|
@ -9,6 +9,7 @@ using System.Threading.Tasks;
|
||||||
using System.Net;
|
using System.Net;
|
||||||
using System.Net.Sockets;
|
using System.Net.Sockets;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using Microsoft.Spark.CSharp.Core;
|
||||||
using Microsoft.Spark.CSharp.Sql;
|
using Microsoft.Spark.CSharp.Sql;
|
||||||
using Razorvine.Pickle;
|
using Razorvine.Pickle;
|
||||||
using Microsoft.Spark.CSharp.Proxy;
|
using Microsoft.Spark.CSharp.Proxy;
|
||||||
|
@ -64,7 +65,7 @@ namespace AdapterTest.Mocks
|
||||||
throw new NotImplementedException();
|
throw new NotImplementedException();
|
||||||
}
|
}
|
||||||
|
|
||||||
public string GetShowString(int numberOfRows, bool truncate)
|
public string GetShowString(int numberOfRows, int truncate, bool vertical)
|
||||||
{
|
{
|
||||||
throw new NotImplementedException();
|
throw new NotImplementedException();
|
||||||
}
|
}
|
||||||
|
@ -236,6 +237,11 @@ namespace AdapterTest.Mocks
|
||||||
}
|
}
|
||||||
|
|
||||||
public IDataFrameProxy Sample(bool withReplacement, double fraction, long seed)
|
public IDataFrameProxy Sample(bool withReplacement, double fraction, long seed)
|
||||||
|
{
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public IDataFrameProxy Broadcast()
|
||||||
{
|
{
|
||||||
throw new NotImplementedException();
|
throw new NotImplementedException();
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,12 +4,13 @@ using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using Microsoft.Spark.CSharp.Core;
|
using Microsoft.Spark.CSharp.Core;
|
||||||
|
using Microsoft.Spark.CSharp.Network;
|
||||||
|
|
||||||
namespace AdapterTest.Mocks
|
namespace AdapterTest.Mocks
|
||||||
{
|
{
|
||||||
class MockRDDCollector : IRDDCollector
|
class MockRDDCollector : IRDDCollector
|
||||||
{
|
{
|
||||||
public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
|
public IEnumerable<dynamic> Collect(SocketInfo port, SerializedMode serializedMode, Type type)
|
||||||
{
|
{
|
||||||
throw new NotImplementedException();
|
throw new NotImplementedException();
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,7 @@ using Microsoft.Spark.CSharp.Core;
|
||||||
using Microsoft.Spark.CSharp.Proxy;
|
using Microsoft.Spark.CSharp.Proxy;
|
||||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
using NUnit.Framework;
|
using NUnit.Framework;
|
||||||
|
using Microsoft.Spark.CSharp.Network;
|
||||||
|
|
||||||
namespace AdapterTest.Mocks
|
namespace AdapterTest.Mocks
|
||||||
{
|
{
|
||||||
|
@ -60,7 +61,7 @@ namespace AdapterTest.Mocks
|
||||||
return union;
|
return union;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int CollectAndServe()
|
public SocketInfo CollectAndServe()
|
||||||
{
|
{
|
||||||
return MockSparkContextProxy.RunJob(this);
|
return MockSparkContextProxy.RunJob(this);
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,13 @@ namespace AdapterTest.Mocks
|
||||||
{
|
{
|
||||||
public class MockRow : Row
|
public class MockRow : Row
|
||||||
{
|
{
|
||||||
|
public override dynamic[] Values
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public override int Size()
|
public override int Size()
|
||||||
{
|
{
|
||||||
|
|
|
@ -195,7 +195,7 @@ namespace AdapterTest.Mocks
|
||||||
throw new NotImplementedException();
|
throw new NotImplementedException();
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static int RunJob(IRDDProxy rdd)
|
internal static SocketInfo RunJob(IRDDProxy rdd)
|
||||||
{
|
{
|
||||||
var mockRdd = (rdd as MockRddProxy);
|
var mockRdd = (rdd as MockRddProxy);
|
||||||
IEnumerable<byte[]> result = mockRdd.pickle ? mockRdd.result.Cast<byte[]>() :
|
IEnumerable<byte[]> result = mockRdd.pickle ? mockRdd.result.Cast<byte[]>() :
|
||||||
|
@ -222,10 +222,12 @@ namespace AdapterTest.Mocks
|
||||||
ns.Flush();
|
ns.Flush();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return (listener.LocalEndPoint as IPEndPoint).Port;
|
|
||||||
|
SocketInfo socketInfo = new SocketInfo((listener.LocalEndPoint as IPEndPoint).Port, null);
|
||||||
|
return socketInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
|
public SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
|
||||||
{
|
{
|
||||||
return RunJob(rdd);
|
return RunJob(rdd);
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,9 +86,9 @@ namespace AdapterTest
|
||||||
Assert.Throws<InvalidOperationException>(() => clientSock.GetStream());
|
Assert.Throws<InvalidOperationException>(() => clientSock.GetStream());
|
||||||
Assert.Throws<InvalidOperationException>(() => clientSock.Receive());
|
Assert.Throws<InvalidOperationException>(() => clientSock.Receive());
|
||||||
Assert.Throws<InvalidOperationException>(() => clientSock.Send(null));
|
Assert.Throws<InvalidOperationException>(() => clientSock.Send(null));
|
||||||
Assert.Throws<SocketException>(() => clientSock.Connect(IPAddress.Any, 1024));
|
Assert.Throws<SocketException>(() => clientSock.Connect(IPAddress.Any, 1024, null));
|
||||||
|
|
||||||
clientSock.Connect(IPAddress.Loopback, port);
|
clientSock.Connect(IPAddress.Loopback, port, null);
|
||||||
|
|
||||||
// Valid invalid operation
|
// Valid invalid operation
|
||||||
var byteBuf = ByteBufPool.Default.Allocate();
|
var byteBuf = ByteBufPool.Default.Allocate();
|
||||||
|
|
|
@ -80,7 +80,7 @@ namespace AdapterTest
|
||||||
ns.Flush();
|
ns.Flush();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return (listener.LocalEndPoint as IPEndPoint).Port;
|
return new SocketInfo((listener.LocalEndPoint as IPEndPoint).Port, null);
|
||||||
});
|
});
|
||||||
_mockRddProxy.Setup(m => m.RDDCollector).Returns(new RDDCollector());
|
_mockRddProxy.Setup(m => m.RDDCollector).Returns(new RDDCollector());
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<packages>
|
<packages>
|
||||||
|
<package id="log4net" version="2.0.8" targetFramework="net45" />
|
||||||
<package id="Moq" version="4.2.1510.2205" targetFramework="net45" />
|
<package id="Moq" version="4.2.1510.2205" targetFramework="net45" />
|
||||||
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
|
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
|
||||||
<package id="NUnit" version="3.0.1" targetFramework="net45" />
|
<package id="NUnit" version="3.0.1" targetFramework="net45" />
|
||||||
<package id="NUnit.Console" version="3.0.1" developmentDependency="true" />
|
<package id="NUnit.Console" version="3.0.1" developmentDependency="true" />
|
||||||
|
<package id="OpenCover" version="4.6.166" targetFramework="net45" developmentDependency="true" />
|
||||||
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
|
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
|
||||||
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
|
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
|
||||||
<package id="OpenCover" version="4.6.166" targetFramework="net45" developmentDependency="true" />
|
|
||||||
</packages>
|
</packages>
|
|
@ -34,6 +34,9 @@
|
||||||
<Prefer32Bit>false</Prefer32Bit>
|
<Prefer32Bit>false</Prefer32Bit>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
|
<HintPath>..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
<Reference Include="Microsoft.CodeAnalysis, Version=1.2.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
<Reference Include="Microsoft.CodeAnalysis, Version=1.2.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.dll</HintPath>
|
<HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.dll</HintPath>
|
||||||
|
@ -50,11 +53,13 @@
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.Scripting.dll</HintPath>
|
<HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.Scripting.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
|
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
|
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
<Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
|
<Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
|
||||||
<HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
<HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Razorvine.Serpent">
|
<Reference Include="Razorvine.Serpent, Version=1.12.0.35091, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
|
<HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="System" />
|
<Reference Include="System" />
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<packages>
|
<packages>
|
||||||
<package id="log4net" version="2.0.5" targetFramework="net45" />
|
<package id="log4net" version="2.0.8" targetFramework="net461" />
|
||||||
<package id="Microsoft.Net.Compilers" version="1.1.1" targetFramework="net45" />
|
<package id="Microsoft.Net.Compilers" version="1.1.1" targetFramework="net45" />
|
||||||
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
|
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net461" />
|
||||||
<package id="Razorvine.Pyrolite" version="4.10.0" targetFramework="net45" />
|
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net461" />
|
||||||
<package id="Razorvine.Serpent" version="1.12.0" targetFramework="net45" />
|
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net461" />
|
||||||
</packages>
|
</packages>
|
|
@ -1867,5 +1867,72 @@ namespace Microsoft.Spark.CSharp.Samples
|
||||||
SparkCLRSamples.FileSystemHelper.DeleteDirectory(path, true);
|
SparkCLRSamples.FileSystemHelper.DeleteDirectory(path, true);
|
||||||
Console.WriteLine("Remove directory: {0}", path);
|
Console.WriteLine("Remove directory: {0}", path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Single UDF Sample
|
||||||
|
/// </summary>
|
||||||
|
[Sample]
|
||||||
|
internal static void SingleUDFSample()
|
||||||
|
{
|
||||||
|
var sqlContext = GetSqlContext();
|
||||||
|
var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
|
||||||
|
peopleDataFrame.RegisterTempTable("peopleDataFrame");
|
||||||
|
|
||||||
|
sqlContext.RegisterFunction("UDF", (int x, int y) => { return x + y; });
|
||||||
|
|
||||||
|
var rowSet = sqlContext.Sql("SELECT * FROM peopleDataFrame where UDF(age, 20) > 60");
|
||||||
|
|
||||||
|
rowSet.Show();
|
||||||
|
|
||||||
|
if (SparkCLRSamples.Configuration.IsValidationEnabled)
|
||||||
|
{
|
||||||
|
Assert.AreEqual(rowSet.Count() ,2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Single UDF Sample with duplicate values
|
||||||
|
/// </summary>
|
||||||
|
[Sample]
|
||||||
|
internal static void SingleUDFWithDupSample()
|
||||||
|
{
|
||||||
|
var sqlContext = GetSqlContext();
|
||||||
|
var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
|
||||||
|
peopleDataFrame.RegisterTempTable("peopleDataFrame");
|
||||||
|
|
||||||
|
sqlContext.RegisterFunction("UDF", (int x, int y) => { return x + y; });
|
||||||
|
|
||||||
|
var rowSet = sqlContext.Sql("SELECT * FROM peopleDataFrame where UDF(age, age) < 50");
|
||||||
|
|
||||||
|
rowSet.Show();
|
||||||
|
|
||||||
|
if (SparkCLRSamples.Configuration.IsValidationEnabled)
|
||||||
|
{
|
||||||
|
Assert.AreEqual(rowSet.Count(), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Multiple UDFs sample
|
||||||
|
/// </summary>
|
||||||
|
[Sample]
|
||||||
|
internal static void MultipleUDFSample()
|
||||||
|
{
|
||||||
|
var sqlContext = GetSqlContext();
|
||||||
|
var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
|
||||||
|
peopleDataFrame.RegisterTempTable("peopleDataFrame");
|
||||||
|
|
||||||
|
sqlContext.RegisterFunction("UDF1", (int x, int y) => { return x + y; });
|
||||||
|
sqlContext.RegisterFunction("UDF2", (string name, string id) => { return name + ":" + id; });
|
||||||
|
|
||||||
|
var rowSet = sqlContext.Sql("SELECT id, name, UDF1(age, 20) AS UDF1, UDF2(name, id) AS UDF2 FROM peopleDataFrame where UDF1(age, 20) > 60");
|
||||||
|
|
||||||
|
rowSet.Show();
|
||||||
|
|
||||||
|
if (SparkCLRSamples.Configuration.IsValidationEnabled)
|
||||||
|
{
|
||||||
|
Assert.AreEqual(rowSet.Count(), 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,8 +66,10 @@ namespace Microsoft.Spark.CSharp.Samples
|
||||||
|
|
||||||
if (Configuration.IsValidationEnabled && !status)
|
if (Configuration.IsValidationEnabled && !status)
|
||||||
{
|
{
|
||||||
Environment.Exit(1);
|
Environment.Exit(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Environment.Exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates and returns a context
|
// Creates and returns a context
|
||||||
|
|
|
@ -33,9 +33,11 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="Newtonsoft.Json, Version=7.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
<HintPath>..\..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
|
||||||
<Private>True</Private>
|
</Reference>
|
||||||
|
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
|
<HintPath>..\..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
|
<Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
|
<HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<packages>
|
<packages>
|
||||||
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
|
<package id="log4net" version="2.0.8" targetFramework="net45" />
|
||||||
|
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
|
||||||
<package id="NUnit" version="3.0.1" targetFramework="net45" />
|
<package id="NUnit" version="3.0.1" targetFramework="net45" />
|
||||||
</packages>
|
</packages>
|
|
@ -36,11 +36,10 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
|
||||||
<SpecificVersion>False</SpecificVersion>
|
|
||||||
<HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
|
||||||
</Reference>
|
|
||||||
<Reference Include="Microsoft.CSharp" />
|
<Reference Include="Microsoft.CSharp" />
|
||||||
|
<Reference Include="Newtonsoft.Json">
|
||||||
|
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
<Reference Include="Razorvine.Pyrolite">
|
<Reference Include="Razorvine.Pyrolite">
|
||||||
<HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
<HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
// Copyright (c) Microsoft. All rights reserved.
|
||||||
|
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
|
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||||
|
|
||||||
|
namespace Microsoft.Spark.CSharp.Utils.FileSystem
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// See https://hadoop.apache.org/docs/r2.6.1/api/org/apache/hadoop/fs/FileStatus.html
|
||||||
|
/// </summary>
|
||||||
|
public class HdfsFileStatus
|
||||||
|
{
|
||||||
|
public long Length => _status.Value.Length;
|
||||||
|
public long ModificationTime => _status.Value.Time;
|
||||||
|
public string Owner => _status.Value.Owner;
|
||||||
|
public string Path => _status.Value.Path;
|
||||||
|
public bool IsFile => _status.Value.IsFile;
|
||||||
|
public bool IsDirectory => _status.Value.IsDirectory;
|
||||||
|
public bool IsSymlink => _status.Value.IsSymlink;
|
||||||
|
|
||||||
|
private Lazy<Status> _status;
|
||||||
|
|
||||||
|
internal HdfsFileStatus(JvmObjectReference obj)
|
||||||
|
{
|
||||||
|
_status = new Lazy<Status>(()=>new Status(obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
private class Status
|
||||||
|
{
|
||||||
|
public long Length;
|
||||||
|
public long Time;
|
||||||
|
public string Owner;
|
||||||
|
public string Path;
|
||||||
|
public bool IsFile;
|
||||||
|
public bool IsDirectory;
|
||||||
|
public bool IsSymlink;
|
||||||
|
|
||||||
|
public Status(JvmObjectReference obj)
|
||||||
|
{
|
||||||
|
Length = (long) SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getLen");
|
||||||
|
Time = (long)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getModificationTime");
|
||||||
|
Owner = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getOwner");
|
||||||
|
IsFile = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isFile");
|
||||||
|
IsDirectory = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isDirectory");
|
||||||
|
IsSymlink = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isSymlink");
|
||||||
|
var pr = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getPath"));
|
||||||
|
Path = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pr, "getName");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -4,8 +4,11 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Diagnostics.CodeAnalysis;
|
using System.Diagnostics.CodeAnalysis;
|
||||||
|
using System.Linq;
|
||||||
|
using Microsoft.Spark.CSharp.Interop;
|
||||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||||
|
using Microsoft.Spark.CSharp.Utils.FileSystem;
|
||||||
|
|
||||||
namespace Microsoft.Spark.CSharp.Utils
|
namespace Microsoft.Spark.CSharp.Utils
|
||||||
{
|
{
|
||||||
|
@ -45,6 +48,15 @@ namespace Microsoft.Spark.CSharp.Utils
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// List the names of all the files under the given path.
|
||||||
|
/// </summary>
|
||||||
|
public IEnumerable<HdfsFileStatus> ListStatus(string path)
|
||||||
|
{
|
||||||
|
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||||
|
return ((List<JvmObjectReference>)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "listStatus", pathJvmReference)).Select(r=>new HdfsFileStatus(r));
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Build a temp file path under '/tmp' path on HDFS.
|
/// Build a temp file path under '/tmp' path on HDFS.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -91,5 +103,37 @@ namespace Microsoft.Spark.CSharp.Utils
|
||||||
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||||
return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "delete", pathJvmReference, recursive);
|
return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "delete", pathJvmReference, recursive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public bool IsFile(string path)
|
||||||
|
{
|
||||||
|
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||||
|
return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "isFile", pathJvmReference);
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool IsDirectory(string path)
|
||||||
|
{
|
||||||
|
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||||
|
return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "isDirectory", pathJvmReference);
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool Touch(string path)
|
||||||
|
{
|
||||||
|
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||||
|
return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "createNewFile", pathJvmReference);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void CopyFromLocalFile(string src, string dest)
|
||||||
|
{
|
||||||
|
var from = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", new Uri(src).AbsoluteUri);
|
||||||
|
var to = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", dest);
|
||||||
|
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "copyFromLocalFile", from, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void CopyToLocalFile(string src, string dest)
|
||||||
|
{
|
||||||
|
var to = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", new Uri(dest).AbsoluteUri);
|
||||||
|
var from = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", src);
|
||||||
|
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "copyToLocalFile", from, to);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
<Reference Include="Microsoft.CSharp" />
|
<Reference Include="Microsoft.CSharp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<Compile Include="FileSystem\HdfsFileStatus.cs" />
|
||||||
<Compile Include="FileSystem\LocalFileSystemHelper.cs" />
|
<Compile Include="FileSystem\LocalFileSystemHelper.cs" />
|
||||||
<Compile Include="FileSystem\HdfsFileSystemHelper.cs" />
|
<Compile Include="FileSystem\HdfsFileSystemHelper.cs" />
|
||||||
<Compile Include="FileSystem\FileSystemHelper.cs" />
|
<Compile Include="FileSystem\FileSystemHelper.cs" />
|
||||||
|
|
|
@ -111,6 +111,7 @@ namespace Microsoft.Spark.CSharp
|
||||||
|
|
||||||
bool sparkReuseWorker = false;
|
bool sparkReuseWorker = false;
|
||||||
string envVar = Environment.GetEnvironmentVariable("SPARK_REUSE_WORKER"); // this envVar is set in JVM side
|
string envVar = Environment.GetEnvironmentVariable("SPARK_REUSE_WORKER"); // this envVar is set in JVM side
|
||||||
|
var secret = Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_SECRET");
|
||||||
if ((envVar != null) && envVar.Equals("1"))
|
if ((envVar != null) && envVar.Equals("1"))
|
||||||
{
|
{
|
||||||
sparkReuseWorker = true;
|
sparkReuseWorker = true;
|
||||||
|
@ -130,7 +131,7 @@ namespace Microsoft.Spark.CSharp
|
||||||
SerDe.Write(s, trId); // write taskRunnerId to JVM side
|
SerDe.Write(s, trId); // write taskRunnerId to JVM side
|
||||||
s.Flush();
|
s.Flush();
|
||||||
}
|
}
|
||||||
TaskRunner taskRunner = new TaskRunner(trId, socket, sparkReuseWorker);
|
TaskRunner taskRunner = new TaskRunner(trId, socket, sparkReuseWorker, secret);
|
||||||
waitingTaskRunners.Add(taskRunner);
|
waitingTaskRunners.Add(taskRunner);
|
||||||
taskRunnerRegistry[trId] = taskRunner;
|
taskRunnerRegistry[trId] = taskRunner;
|
||||||
trId++;
|
trId++;
|
||||||
|
|
|
@ -3,7 +3,9 @@
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using System.Net;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
|
using System.Text;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using Microsoft.Spark.CSharp.Configuration;
|
using Microsoft.Spark.CSharp.Configuration;
|
||||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
|
@ -33,17 +35,19 @@ namespace Microsoft.Spark.CSharp
|
||||||
private readonly ISocketWrapper socket; // Socket to communicate with JVM
|
private readonly ISocketWrapper socket; // Socket to communicate with JVM
|
||||||
private volatile bool stop;
|
private volatile bool stop;
|
||||||
private readonly bool socketReuse; // whether the socket can be reused to run multiple Spark tasks
|
private readonly bool socketReuse; // whether the socket can be reused to run multiple Spark tasks
|
||||||
|
private string secret;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Task runner Id
|
/// Task runner Id
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public int TaskId { get; private set; }
|
public int TaskId { get; private set; }
|
||||||
|
|
||||||
public TaskRunner(int trId, ISocketWrapper socket, bool socketReuse)
|
public TaskRunner(int trId, ISocketWrapper socket, bool socketReuse, string secret)
|
||||||
{
|
{
|
||||||
TaskId = trId;
|
TaskId = trId;
|
||||||
this.socket = socket;
|
this.socket = socket;
|
||||||
this.socketReuse = socketReuse;
|
this.socketReuse = socketReuse;
|
||||||
|
this.secret = secret;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Run()
|
public void Run()
|
||||||
|
@ -57,6 +61,14 @@ namespace Microsoft.Spark.CSharp
|
||||||
using (var inputStream = socket.GetInputStream())
|
using (var inputStream = socket.GetInputStream())
|
||||||
using (var outputStream = socket.GetOutputStream())
|
using (var outputStream = socket.GetOutputStream())
|
||||||
{
|
{
|
||||||
|
if (!string.IsNullOrEmpty(secret))
|
||||||
|
{
|
||||||
|
SerDe.Write(outputStream, secret);
|
||||||
|
outputStream.Flush();
|
||||||
|
var reply = SerDe.ReadString(inputStream);
|
||||||
|
Logger.LogDebug("Connect back to JVM: " + reply);
|
||||||
|
secret = null;
|
||||||
|
}
|
||||||
byte[] bytes = SerDe.ReadBytes(inputStream, sizeof(int));
|
byte[] bytes = SerDe.ReadBytes(inputStream, sizeof(int));
|
||||||
if (bytes != null)
|
if (bytes != null)
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,391 @@
|
||||||
|
// Copyright (c) Microsoft. All rights reserved.
|
||||||
|
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||||
|
|
||||||
|
using Microsoft.Spark.CSharp.Core;
|
||||||
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
|
using Microsoft.Spark.CSharp.Services;
|
||||||
|
using Microsoft.Spark.CSharp.Sql;
|
||||||
|
using Razorvine.Pickle;
|
||||||
|
using System;
|
||||||
|
using System.Collections;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Diagnostics;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Runtime.Serialization;
|
||||||
|
using System.Runtime.Serialization.Formatters.Binary;
|
||||||
|
|
||||||
|
namespace Microsoft.Spark.CSharp
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// This class execute user defined methods.
|
||||||
|
/// </summary>
|
||||||
|
|
||||||
|
internal class UDFCommand
|
||||||
|
{
|
||||||
|
private readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||||
|
private ILoggerService logger;
|
||||||
|
private Stream inputStream;
|
||||||
|
private Stream outputStream;
|
||||||
|
private int splitIndex;
|
||||||
|
private DateTime bootTime;
|
||||||
|
private string deserializerMode;
|
||||||
|
private string serializerMode;
|
||||||
|
private IFormatter formatter;
|
||||||
|
private Stopwatch commandProcessWatch;
|
||||||
|
private int isSqlUdf;
|
||||||
|
private List<WorkerFunc> workerFuncList;
|
||||||
|
private int stageId;
|
||||||
|
|
||||||
|
public UDFCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime,
|
||||||
|
string deserializerMode, string serializerMode, IFormatter formatter,
|
||||||
|
Stopwatch commandProcessWatch, int isSqlUdf, List<WorkerFunc> workerFuncList, int stageId)
|
||||||
|
{
|
||||||
|
this.inputStream = inputStream;
|
||||||
|
this.outputStream = outputStream;
|
||||||
|
this.splitIndex = splitIndex;
|
||||||
|
this.bootTime = bootTime;
|
||||||
|
this.deserializerMode = deserializerMode;
|
||||||
|
this.serializerMode = serializerMode;
|
||||||
|
this.formatter = formatter;
|
||||||
|
this.commandProcessWatch = commandProcessWatch;
|
||||||
|
this.isSqlUdf = isSqlUdf;
|
||||||
|
this.workerFuncList = workerFuncList;
|
||||||
|
this.stageId = stageId;
|
||||||
|
|
||||||
|
InitializeLogger();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitializeLogger()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// if there exists exe.config file, then use log4net
|
||||||
|
if (File.Exists(AppDomain.CurrentDomain.SetupInformation.ConfigurationFile))
|
||||||
|
{
|
||||||
|
LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger = LoggerServiceFactory.GetLogger(typeof(UDFCommand));
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine("InitializeLogger exception {0}, will exit", e);
|
||||||
|
Environment.Exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal void Execute()
|
||||||
|
{
|
||||||
|
if (isSqlUdf == 0)
|
||||||
|
{
|
||||||
|
ExecuteNonSqlUDF();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ExecuteSqlUDF();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ExecuteNonSqlUDF()
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
int nullMessageCount = 0;
|
||||||
|
logger.LogDebug("Beginning to execute non sql func");
|
||||||
|
WorkerFunc workerFunc = workerFuncList[0];
|
||||||
|
var func = workerFunc.CharpWorkerFunc.Func;
|
||||||
|
|
||||||
|
var funcProcessWatch = Stopwatch.StartNew();
|
||||||
|
DateTime initTime = DateTime.UtcNow;
|
||||||
|
foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
|
||||||
|
{
|
||||||
|
funcProcessWatch.Stop();
|
||||||
|
|
||||||
|
if (object.ReferenceEquals(null, message))
|
||||||
|
{
|
||||||
|
nullMessageCount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
WriteOutput(outputStream, serializerMode, message, formatter);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
logger.LogError("WriteOutput() failed at iteration {0}, execption {1}", count, ex);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
count++;
|
||||||
|
funcProcessWatch.Start();
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.LogInfo("Output entries count: " + count);
|
||||||
|
logger.LogDebug("Null messages count: " + nullMessageCount);
|
||||||
|
|
||||||
|
WriteDiagnosticsInfo(outputStream, bootTime, initTime);
|
||||||
|
|
||||||
|
commandProcessWatch.Stop();
|
||||||
|
|
||||||
|
// log statistics
|
||||||
|
logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
|
||||||
|
logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ExecuteSqlUDF()
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
int nullMessageCount = 0;
|
||||||
|
logger.LogDebug("Beginning to execute sql func");
|
||||||
|
|
||||||
|
var funcProcessWatch = Stopwatch.StartNew();
|
||||||
|
DateTime initTime = DateTime.UtcNow;
|
||||||
|
|
||||||
|
foreach (var row in GetIterator(inputStream, deserializerMode, isSqlUdf))
|
||||||
|
{
|
||||||
|
List<Object> messages = new List<Object>();
|
||||||
|
|
||||||
|
foreach (WorkerFunc workerFunc in workerFuncList)
|
||||||
|
{
|
||||||
|
List<Object> args = new List<Object>();
|
||||||
|
foreach (int offset in workerFunc.ArgOffsets)
|
||||||
|
{
|
||||||
|
args.Add(row[offset]);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (var message in workerFunc.CharpWorkerFunc.Func(splitIndex, new[] { args.ToArray()}))
|
||||||
|
{
|
||||||
|
funcProcessWatch.Stop();
|
||||||
|
|
||||||
|
if (object.ReferenceEquals(null, message))
|
||||||
|
{
|
||||||
|
nullMessageCount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
messages.Add(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
dynamic res = messages.ToArray();
|
||||||
|
if (messages.Count == 1)
|
||||||
|
{
|
||||||
|
res = messages[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
WriteOutput(outputStream, serializerMode, res, formatter);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
logger.LogError("WriteOutput() failed at iteration {0}, exception error {1}", count, ex.Message);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
count++;
|
||||||
|
funcProcessWatch.Start();
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.LogInfo("Output entries count: " + count);
|
||||||
|
logger.LogDebug("Null messages count: " + nullMessageCount);
|
||||||
|
|
||||||
|
WriteDiagnosticsInfo(outputStream, bootTime, initTime);
|
||||||
|
|
||||||
|
commandProcessWatch.Stop();
|
||||||
|
|
||||||
|
// log statistics
|
||||||
|
logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
|
||||||
|
logger.LogInfo("stage {0}, command process time: {0}", stageId, commandProcessWatch.ElapsedMilliseconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void WriteOutput(Stream networkStream, string serializerMode, dynamic message, IFormatter formatter)
|
||||||
|
{
|
||||||
|
var buffer = GetSerializedMessage(serializerMode, message, formatter);
|
||||||
|
if (buffer == null)
|
||||||
|
{
|
||||||
|
logger.LogError("Buffer is null");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buffer.Length <= 0)
|
||||||
|
{
|
||||||
|
logger.LogError("Buffer length {0} cannot be <= 0", buffer.Length);
|
||||||
|
}
|
||||||
|
|
||||||
|
SerDe.Write(networkStream, buffer.Length);
|
||||||
|
SerDe.Write(networkStream, buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] GetSerializedMessage(string serializerMode, dynamic message, IFormatter formatter)
|
||||||
|
{
|
||||||
|
byte[] buffer;
|
||||||
|
|
||||||
|
switch ((SerializedMode)Enum.Parse(typeof(SerializedMode), serializerMode))
|
||||||
|
{
|
||||||
|
case SerializedMode.None:
|
||||||
|
buffer = message as byte[];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SerializedMode.String:
|
||||||
|
buffer = SerDe.ToBytes(message as string);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SerializedMode.Row:
|
||||||
|
var pickler = new Pickler();
|
||||||
|
buffer = pickler.dumps(new ArrayList { message });
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var ms = new MemoryStream();
|
||||||
|
formatter.Serialize(ms, message);
|
||||||
|
buffer = ms.ToArray();
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
logger.LogError("Exception serializing output: " + ex);
|
||||||
|
logger.LogError("{0} : {1}", message.GetType().Name, message.GetType().FullName);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void WriteDiagnosticsInfo(Stream networkStream, DateTime bootTime, DateTime initTime)
|
||||||
|
{
|
||||||
|
DateTime finishTime = DateTime.UtcNow;
|
||||||
|
const string format = "MM/dd/yyyy hh:mm:ss.fff tt";
|
||||||
|
|
||||||
|
logger.LogDebug("bootTime: {0}, initTime: {1}, finish_time: {2}",
|
||||||
|
bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format));
|
||||||
|
|
||||||
|
SerDe.Write(networkStream, (int)SpecialLengths.TIMING_DATA);
|
||||||
|
SerDe.Write(networkStream, ToUnixTime(bootTime));
|
||||||
|
SerDe.Write(networkStream, ToUnixTime(initTime));
|
||||||
|
SerDe.Write(networkStream, ToUnixTime(finishTime));
|
||||||
|
|
||||||
|
SerDe.Write(networkStream, 0L); //shuffle.MemoryBytesSpilled
|
||||||
|
SerDe.Write(networkStream, 0L); //shuffle.DiskBytesSpilled
|
||||||
|
}
|
||||||
|
|
||||||
|
private long ToUnixTime(DateTime dt)
|
||||||
|
{
|
||||||
|
return (long)(dt - UnixTimeEpoch).TotalMilliseconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
private IEnumerable<dynamic> GetIterator(Stream inputStream, string serializedMode, int isFuncSqlUdf)
|
||||||
|
{
|
||||||
|
logger.LogInfo("Serialized mode in GetIterator: " + serializedMode);
|
||||||
|
IFormatter formatter = new BinaryFormatter();
|
||||||
|
var mode = (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode);
|
||||||
|
int messageLength;
|
||||||
|
Stopwatch watch = Stopwatch.StartNew();
|
||||||
|
Row tempRow = null;
|
||||||
|
|
||||||
|
while ((messageLength = SerDe.ReadInt(inputStream)) != (int)SpecialLengths.END_OF_DATA_SECTION)
|
||||||
|
{
|
||||||
|
watch.Stop();
|
||||||
|
if (messageLength > 0 || messageLength == (int)SpecialLengths.NULL)
|
||||||
|
{
|
||||||
|
watch.Start();
|
||||||
|
byte[] buffer = messageLength > 0 ? SerDe.ReadBytes(inputStream, messageLength) : null;
|
||||||
|
watch.Stop();
|
||||||
|
switch (mode)
|
||||||
|
{
|
||||||
|
case SerializedMode.String:
|
||||||
|
{
|
||||||
|
if (messageLength > 0)
|
||||||
|
{
|
||||||
|
if (buffer == null)
|
||||||
|
{
|
||||||
|
logger.LogDebug("Buffer is null. Message length is {0}", messageLength);
|
||||||
|
}
|
||||||
|
yield return SerDe.ToString(buffer);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
yield return null;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SerializedMode.Row:
|
||||||
|
{
|
||||||
|
Debug.Assert(messageLength > 0);
|
||||||
|
var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
|
||||||
|
|
||||||
|
if (isFuncSqlUdf == 0)
|
||||||
|
{
|
||||||
|
foreach (var row in unpickledObjects.Select(item => (item as RowConstructor).GetRow()))
|
||||||
|
{
|
||||||
|
yield return row;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
foreach (var row in unpickledObjects)
|
||||||
|
{
|
||||||
|
yield return row;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SerializedMode.Pair:
|
||||||
|
{
|
||||||
|
byte[] pairKey = buffer;
|
||||||
|
byte[] pairValue;
|
||||||
|
|
||||||
|
watch.Start();
|
||||||
|
int valueLength = SerDe.ReadInt(inputStream);
|
||||||
|
if (valueLength > 0)
|
||||||
|
{
|
||||||
|
pairValue = SerDe.ReadBytes(inputStream, valueLength);
|
||||||
|
}
|
||||||
|
else if (valueLength == (int)SpecialLengths.NULL)
|
||||||
|
{
|
||||||
|
pairValue = null;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new Exception(string.Format("unexpected valueLength: {0}", valueLength));
|
||||||
|
}
|
||||||
|
watch.Stop();
|
||||||
|
|
||||||
|
yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SerializedMode.None: //just return raw bytes
|
||||||
|
{
|
||||||
|
yield return buffer;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
if (buffer != null)
|
||||||
|
{
|
||||||
|
var ms = new MemoryStream(buffer);
|
||||||
|
yield return formatter.Deserialize(ms);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
yield return null;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
watch.Start();
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.LogInfo("total receive time: {0}", watch.ElapsedMilliseconds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,7 +2,6 @@
|
||||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.Collections;
|
|
||||||
using System.Collections.Concurrent;
|
using System.Collections.Concurrent;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
@ -17,8 +16,6 @@ using Microsoft.Spark.CSharp.Core;
|
||||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||||
using Microsoft.Spark.CSharp.Network;
|
using Microsoft.Spark.CSharp.Network;
|
||||||
using Microsoft.Spark.CSharp.Services;
|
using Microsoft.Spark.CSharp.Services;
|
||||||
using Microsoft.Spark.CSharp.Sql;
|
|
||||||
using Razorvine.Pickle;
|
|
||||||
|
|
||||||
namespace Microsoft.Spark.CSharp
|
namespace Microsoft.Spark.CSharp
|
||||||
{
|
{
|
||||||
|
@ -31,7 +28,6 @@ namespace Microsoft.Spark.CSharp
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class Worker
|
public class Worker
|
||||||
{
|
{
|
||||||
private static readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
|
|
||||||
private static ILoggerService logger;
|
private static ILoggerService logger;
|
||||||
private static SparkCLRAssemblyHandler assemblyHandler;
|
private static SparkCLRAssemblyHandler assemblyHandler;
|
||||||
|
|
||||||
|
@ -81,11 +77,13 @@ namespace Microsoft.Spark.CSharp
|
||||||
InitializeLogger();
|
InitializeLogger();
|
||||||
logger.LogInfo("RunSimpleWorker ...");
|
logger.LogInfo("RunSimpleWorker ...");
|
||||||
PrintFiles();
|
PrintFiles();
|
||||||
|
//int javaPort = int.Parse(Console.ReadLine()); //reading port number written from JVM
|
||||||
int javaPort = int.Parse(Console.ReadLine()); //reading port number written from JVM
|
var javaPort = int.Parse(Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT"));
|
||||||
logger.LogDebug("Port number used to pipe in/out data between JVM and CLR {0}", javaPort);
|
var secret = Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_SECRET");
|
||||||
|
logger.LogDebug("Port and secret number used to pipe in/out data between JVM and CLR {0} {1}", javaPort, secret);
|
||||||
var socket = InitializeSocket(javaPort);
|
var socket = InitializeSocket(javaPort);
|
||||||
TaskRunner taskRunner = new TaskRunner(0, socket, false);
|
//Microsoft.Spark.CSharp.Network.Utils.DoServerAuth(socket, secret);
|
||||||
|
TaskRunner taskRunner = new TaskRunner(0, socket, false, secret);
|
||||||
taskRunner.Run();
|
taskRunner.Run();
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
|
@ -119,7 +117,7 @@ namespace Microsoft.Spark.CSharp
|
||||||
private static ISocketWrapper InitializeSocket(int javaPort)
|
private static ISocketWrapper InitializeSocket(int javaPort)
|
||||||
{
|
{
|
||||||
var socket = SocketFactory.CreateSocket();
|
var socket = SocketFactory.CreateSocket();
|
||||||
socket.Connect(IPAddress.Loopback, javaPort);
|
socket.Connect(IPAddress.Loopback, javaPort, null);
|
||||||
return socket;
|
return socket;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -138,6 +136,10 @@ namespace Microsoft.Spark.CSharp
|
||||||
//// initialize global state
|
//// initialize global state
|
||||||
//shuffle.MemoryBytesSpilled = 0
|
//shuffle.MemoryBytesSpilled = 0
|
||||||
//shuffle.DiskBytesSpilled = 0
|
//shuffle.DiskBytesSpilled = 0
|
||||||
|
SerDe.ReadInt(inputStream);
|
||||||
|
SerDe.ReadInt(inputStream);
|
||||||
|
SerDe.ReadInt(inputStream);
|
||||||
|
SerDe.ReadLong(inputStream);
|
||||||
|
|
||||||
// fetch name of workdir
|
// fetch name of workdir
|
||||||
string sparkFilesDir = SerDe.ReadString(inputStream);
|
string sparkFilesDir = SerDe.ReadString(inputStream);
|
||||||
|
@ -255,13 +257,33 @@ namespace Microsoft.Spark.CSharp
|
||||||
logger.LogDebug("Is func Sql UDF = {0}", isSqlUdf);
|
logger.LogDebug("Is func Sql UDF = {0}", isSqlUdf);
|
||||||
|
|
||||||
IFormatter formatter = new BinaryFormatter();
|
IFormatter formatter = new BinaryFormatter();
|
||||||
|
UDFCommand command = null;
|
||||||
|
|
||||||
if (isSqlUdf == 0)
|
if (isSqlUdf == 0)
|
||||||
|
{
|
||||||
|
command = ProcessNonUdfCommand(inputStream, outputStream, splitIndex, bootTime, formatter, isSqlUdf);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
command = ProcessUdfCommand(inputStream, outputStream, splitIndex, bootTime, formatter, isSqlUdf);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (command != null)
|
||||||
|
{
|
||||||
|
command.Execute();
|
||||||
|
}
|
||||||
|
|
||||||
|
return formatter;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static UDFCommand ProcessNonUdfCommand(Stream inputStream, Stream outputStream, int splitIndex,
|
||||||
|
DateTime bootTime, IFormatter formatter, int isSqlUdf)
|
||||||
{
|
{
|
||||||
logger.LogDebug("Processing non-UDF command");
|
logger.LogDebug("Processing non-UDF command");
|
||||||
int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
|
int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
|
||||||
logger.LogDebug("Command length: " + lengthOfCommandByteArray);
|
logger.LogDebug("Command length: " + lengthOfCommandByteArray);
|
||||||
|
|
||||||
|
UDFCommand command = null;
|
||||||
if (lengthOfCommandByteArray > 0)
|
if (lengthOfCommandByteArray > 0)
|
||||||
{
|
{
|
||||||
var commandProcessWatch = new Stopwatch();
|
var commandProcessWatch = new Stopwatch();
|
||||||
|
@ -270,50 +292,57 @@ namespace Microsoft.Spark.CSharp
|
||||||
int stageId;
|
int stageId;
|
||||||
string deserializerMode;
|
string deserializerMode;
|
||||||
string serializerMode;
|
string serializerMode;
|
||||||
CSharpWorkerFunc workerFunc;
|
CSharpWorkerFunc cSharpWorkerFunc;
|
||||||
ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
|
ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
|
||||||
out workerFunc);
|
out cSharpWorkerFunc);
|
||||||
|
|
||||||
|
command = new UDFCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode,
|
||||||
|
serializerMode, formatter, commandProcessWatch, isSqlUdf,
|
||||||
|
new List<WorkerFunc>() { new WorkerFunc(cSharpWorkerFunc, 0, null) }, stageId);
|
||||||
|
|
||||||
ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, workerFunc, serializerMode,
|
|
||||||
formatter, commandProcessWatch, stageId, isSqlUdf);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-(");
|
logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-(");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return command;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
private static UDFCommand ProcessUdfCommand(Stream inputStream, Stream outputStream, int splitIndex,
|
||||||
|
DateTime bootTime, IFormatter formatter, int isSqlUdf)
|
||||||
{
|
{
|
||||||
logger.LogDebug("Processing UDF command");
|
logger.LogDebug("Processing UDF command");
|
||||||
var udfCount = SerDe.ReadInt(inputStream);
|
var udfCount = SerDe.ReadInt(inputStream);
|
||||||
logger.LogDebug("Count of UDFs = {0}", udfCount);
|
logger.LogDebug("Count of UDFs = {0}", udfCount);
|
||||||
|
|
||||||
if (udfCount == 1)
|
int stageId = -1;
|
||||||
|
string deserializerMode = null;
|
||||||
|
string serializerMode = null;
|
||||||
|
var commandProcessWatch = new Stopwatch();
|
||||||
|
List<WorkerFunc> workerFuncList = new List<WorkerFunc>();
|
||||||
|
|
||||||
|
for(int udfIter = 0; udfIter < udfCount; udfIter++)
|
||||||
{
|
{
|
||||||
CSharpWorkerFunc func = null;
|
CSharpWorkerFunc func = null;
|
||||||
var argCount = SerDe.ReadInt(inputStream);
|
var argCount = SerDe.ReadInt(inputStream);
|
||||||
logger.LogDebug("Count of args = {0}", argCount);
|
logger.LogDebug("Count of args = {0}", argCount);
|
||||||
|
|
||||||
var argOffsets = new List<int>();
|
List<int> argOffsets = new List<int>();
|
||||||
|
|
||||||
for (int argIndex = 0; argIndex < argCount; argIndex++)
|
for (int argIndex = 0; argIndex < argCount; argIndex++)
|
||||||
{
|
{
|
||||||
var offset = SerDe.ReadInt(inputStream);
|
var offset = SerDe.ReadInt(inputStream);
|
||||||
logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
|
logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
|
||||||
argOffsets.Add(offset);
|
argOffsets.Add(offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
var chainedFuncCount = SerDe.ReadInt(inputStream);
|
var chainedFuncCount = SerDe.ReadInt(inputStream);
|
||||||
logger.LogDebug("Count of chained func = {0}", chainedFuncCount);
|
logger.LogDebug("Count of chained func = {0}", chainedFuncCount);
|
||||||
|
|
||||||
var commandProcessWatch = new Stopwatch();
|
|
||||||
int stageId = -1;
|
|
||||||
string deserializerMode = null;
|
|
||||||
string serializerMode = null;
|
|
||||||
for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
|
for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
|
||||||
{
|
{
|
||||||
int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
|
int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
|
||||||
logger.LogDebug("UDF command length: " + lengthOfCommandByteArray)
|
logger.LogDebug("UDF command length: " + lengthOfCommandByteArray);
|
||||||
;
|
|
||||||
|
|
||||||
if (lengthOfCommandByteArray > 0)
|
if (lengthOfCommandByteArray > 0)
|
||||||
{
|
{
|
||||||
|
@ -333,16 +362,12 @@ namespace Microsoft.Spark.CSharp
|
||||||
Debug.Assert(deserializerMode != null);
|
Debug.Assert(deserializerMode != null);
|
||||||
Debug.Assert(serializerMode != null);
|
Debug.Assert(serializerMode != null);
|
||||||
Debug.Assert(func != null);
|
Debug.Assert(func != null);
|
||||||
ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, func, serializerMode, formatter,
|
|
||||||
commandProcessWatch, stageId, isSqlUdf);
|
workerFuncList.Add(new WorkerFunc(func, argCount, argOffsets));
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw new NotSupportedException(); //TODO - add support for multiple UDFs
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return formatter;
|
return new UDFCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode,
|
||||||
|
serializerMode, formatter, commandProcessWatch, isSqlUdf, workerFuncList, stageId);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void ReadCommand(Stream networkStream, IFormatter formatter, out int stageId,
|
private static void ReadCommand(Stream networkStream, IFormatter formatter, out int stageId,
|
||||||
|
@ -389,115 +414,6 @@ namespace Microsoft.Spark.CSharp
|
||||||
logger.LogDebug(sb.ToString());
|
logger.LogDebug(sb.ToString());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void ExecuteCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime,
|
|
||||||
string deserializerMode, CSharpWorkerFunc workerFunc, string serializerMode,
|
|
||||||
IFormatter formatter, Stopwatch commandProcessWatch, int stageId, int isSqlUdf)
|
|
||||||
{
|
|
||||||
int count = 0;
|
|
||||||
int nullMessageCount = 0;
|
|
||||||
logger.LogDebug("Beginning to execute func");
|
|
||||||
var func = workerFunc.Func;
|
|
||||||
|
|
||||||
var funcProcessWatch = Stopwatch.StartNew();
|
|
||||||
DateTime initTime = DateTime.UtcNow;
|
|
||||||
foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
|
|
||||||
{
|
|
||||||
funcProcessWatch.Stop();
|
|
||||||
|
|
||||||
if (object.ReferenceEquals(null, message))
|
|
||||||
{
|
|
||||||
nullMessageCount++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
WriteOutput(outputStream, serializerMode, message, formatter);
|
|
||||||
}
|
|
||||||
catch (Exception)
|
|
||||||
{
|
|
||||||
logger.LogError("WriteOutput() failed at iteration {0}", count);
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
|
|
||||||
count++;
|
|
||||||
funcProcessWatch.Start();
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.LogInfo("Output entries count: " + count);
|
|
||||||
logger.LogDebug("Null messages count: " + nullMessageCount);
|
|
||||||
|
|
||||||
//if profiler:
|
|
||||||
// profiler.profile(process)
|
|
||||||
//else:
|
|
||||||
// process()
|
|
||||||
|
|
||||||
WriteDiagnosticsInfo(outputStream, bootTime, initTime);
|
|
||||||
|
|
||||||
commandProcessWatch.Stop();
|
|
||||||
|
|
||||||
// log statistics
|
|
||||||
logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
|
|
||||||
logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void WriteOutput(Stream networkStream, string serializerMode, dynamic message, IFormatter formatter)
|
|
||||||
{
|
|
||||||
var buffer = GetSerializedMessage(serializerMode, message, formatter);
|
|
||||||
if (buffer == null)
|
|
||||||
{
|
|
||||||
logger.LogError("Buffer is null");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (buffer.Length <= 0)
|
|
||||||
{
|
|
||||||
logger.LogError("Buffer length {0} cannot be <= 0", buffer.Length);
|
|
||||||
}
|
|
||||||
|
|
||||||
//Debug.Assert(buffer != null);
|
|
||||||
//Debug.Assert(buffer.Length > 0);
|
|
||||||
SerDe.Write(networkStream, buffer.Length);
|
|
||||||
SerDe.Write(networkStream, buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static byte[] GetSerializedMessage(string serializerMode, dynamic message, IFormatter formatter)
|
|
||||||
{
|
|
||||||
byte[] buffer;
|
|
||||||
|
|
||||||
switch ((SerializedMode)Enum.Parse(typeof(SerializedMode), serializerMode))
|
|
||||||
{
|
|
||||||
case SerializedMode.None:
|
|
||||||
buffer = message as byte[];
|
|
||||||
break;
|
|
||||||
|
|
||||||
case SerializedMode.String:
|
|
||||||
buffer = SerDe.ToBytes(message as string);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case SerializedMode.Row:
|
|
||||||
var pickler = new Pickler();
|
|
||||||
buffer = pickler.dumps(new ArrayList { message });
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
try
|
|
||||||
{
|
|
||||||
var ms = new MemoryStream();
|
|
||||||
formatter.Serialize(ms, message);
|
|
||||||
buffer = ms.ToArray();
|
|
||||||
}
|
|
||||||
catch (Exception)
|
|
||||||
{
|
|
||||||
logger.LogError("Exception serializing output");
|
|
||||||
logger.LogError("{0} : {1}", message.GetType().Name, message.GetType().FullName);
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int ReadDiagnosticsInfo(Stream networkStream)
|
private static int ReadDiagnosticsInfo(Stream networkStream)
|
||||||
{
|
{
|
||||||
int rddId = SerDe.ReadInt(networkStream);
|
int rddId = SerDe.ReadInt(networkStream);
|
||||||
|
@ -507,21 +423,6 @@ namespace Microsoft.Spark.CSharp
|
||||||
return stageId;
|
return stageId;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void WriteDiagnosticsInfo(Stream networkStream, DateTime bootTime, DateTime initTime)
|
|
||||||
{
|
|
||||||
DateTime finishTime = DateTime.UtcNow;
|
|
||||||
const string format = "MM/dd/yyyy hh:mm:ss.fff tt";
|
|
||||||
logger.LogDebug("bootTime: {0}, initTime: {1}, finish_time: {2}",
|
|
||||||
bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format));
|
|
||||||
SerDe.Write(networkStream, (int)SpecialLengths.TIMING_DATA);
|
|
||||||
SerDe.Write(networkStream, ToUnixTime(bootTime));
|
|
||||||
SerDe.Write(networkStream, ToUnixTime(initTime));
|
|
||||||
SerDe.Write(networkStream, ToUnixTime(finishTime));
|
|
||||||
|
|
||||||
SerDe.Write(networkStream, 0L); //shuffle.MemoryBytesSpilled
|
|
||||||
SerDe.Write(networkStream, 0L); //shuffle.DiskBytesSpilled
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void WriteAccumulatorValues(Stream networkStream, IFormatter formatter)
|
private static void WriteAccumulatorValues(Stream networkStream, IFormatter formatter)
|
||||||
{
|
{
|
||||||
SerDe.Write(networkStream, Accumulator.threadLocalAccumulatorRegistry.Count);
|
SerDe.Write(networkStream, Accumulator.threadLocalAccumulatorRegistry.Count);
|
||||||
|
@ -566,120 +467,6 @@ namespace Microsoft.Spark.CSharp
|
||||||
logger.LogDebug("Location: {0}{1}{2}", folder, Environment.NewLine, outfiles.ToString());
|
logger.LogDebug("Location: {0}{1}{2}", folder, Environment.NewLine, outfiles.ToString());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static long ToUnixTime(DateTime dt)
|
|
||||||
{
|
|
||||||
return (long)(dt - UnixTimeEpoch).TotalMilliseconds;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static IEnumerable<dynamic> GetIterator(Stream inputStream, string serializedMode, int isFuncSqlUdf)
|
|
||||||
{
|
|
||||||
logger.LogInfo("Serialized mode in GetIterator: " + serializedMode);
|
|
||||||
IFormatter formatter = new BinaryFormatter();
|
|
||||||
var mode = (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode);
|
|
||||||
int messageLength;
|
|
||||||
Stopwatch watch = Stopwatch.StartNew();
|
|
||||||
while ((messageLength = SerDe.ReadInt(inputStream)) != (int)SpecialLengths.END_OF_DATA_SECTION)
|
|
||||||
{
|
|
||||||
watch.Stop();
|
|
||||||
if (messageLength > 0 || messageLength == (int)SpecialLengths.NULL)
|
|
||||||
{
|
|
||||||
watch.Start();
|
|
||||||
byte[] buffer = messageLength > 0 ? SerDe.ReadBytes(inputStream, messageLength) : null;
|
|
||||||
watch.Stop();
|
|
||||||
switch (mode)
|
|
||||||
{
|
|
||||||
case SerializedMode.String:
|
|
||||||
{
|
|
||||||
if (messageLength > 0)
|
|
||||||
{
|
|
||||||
if (buffer == null)
|
|
||||||
{
|
|
||||||
logger.LogDebug("Buffer is null. Message length is {0}", messageLength);
|
|
||||||
}
|
|
||||||
yield return SerDe.ToString(buffer);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
yield return null;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case SerializedMode.Row:
|
|
||||||
{
|
|
||||||
Debug.Assert(messageLength > 0);
|
|
||||||
var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
|
|
||||||
|
|
||||||
if (isFuncSqlUdf == 0)
|
|
||||||
{
|
|
||||||
foreach (var row in unpickledObjects.Select(item => (item as RowConstructor).GetRow()))
|
|
||||||
{
|
|
||||||
yield return row;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
foreach (var row in unpickledObjects)
|
|
||||||
{
|
|
||||||
yield return row;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case SerializedMode.Pair:
|
|
||||||
{
|
|
||||||
byte[] pairKey = buffer;
|
|
||||||
byte[] pairValue;
|
|
||||||
|
|
||||||
watch.Start();
|
|
||||||
int valueLength = SerDe.ReadInt(inputStream);
|
|
||||||
if (valueLength > 0)
|
|
||||||
{
|
|
||||||
pairValue = SerDe.ReadBytes(inputStream, valueLength);
|
|
||||||
}
|
|
||||||
else if (valueLength == (int)SpecialLengths.NULL)
|
|
||||||
{
|
|
||||||
pairValue = null;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw new Exception(string.Format("unexpected valueLength: {0}", valueLength));
|
|
||||||
}
|
|
||||||
watch.Stop();
|
|
||||||
|
|
||||||
yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case SerializedMode.None: //just return raw bytes
|
|
||||||
{
|
|
||||||
yield return buffer;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
if (buffer != null)
|
|
||||||
{
|
|
||||||
var ms = new MemoryStream(buffer);
|
|
||||||
yield return formatter.Deserialize(ms);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
yield return null;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
watch.Start();
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.LogInfo("total receive time: {0}", watch.ElapsedMilliseconds);
|
|
||||||
}
|
|
||||||
|
|
||||||
internal class SparkCLRAssemblyHandler
|
internal class SparkCLRAssemblyHandler
|
||||||
{
|
{
|
||||||
private readonly ConcurrentDictionary<string, Assembly> assemblyDict = new ConcurrentDictionary<string, Assembly>();
|
private readonly ConcurrentDictionary<string, Assembly> assemblyDict = new ConcurrentDictionary<string, Assembly>();
|
||||||
|
|
|
@ -46,6 +46,8 @@
|
||||||
<Reference Include="Microsoft.CSharp" />
|
<Reference Include="Microsoft.CSharp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<Compile Include="WorkerFunc.cs" />
|
||||||
|
<Compile Include="UDFCommand.cs" />
|
||||||
<Compile Include="MultiThreadWorker.cs" />
|
<Compile Include="MultiThreadWorker.cs" />
|
||||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||||
<Compile Include="TaskRunner.cs" />
|
<Compile Include="TaskRunner.cs" />
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
// Copyright (c) Microsoft. All rights reserved.
|
||||||
|
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||||
|
|
||||||
|
using System.Runtime.Serialization;
|
||||||
|
using Microsoft.Spark.CSharp.Core;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
|
||||||
|
namespace Microsoft.Spark.CSharp
|
||||||
|
{
|
||||||
|
internal class WorkerFunc
|
||||||
|
{
|
||||||
|
internal CSharpWorkerFunc CharpWorkerFunc { get; }
|
||||||
|
|
||||||
|
internal int ArgsCount { get; }
|
||||||
|
|
||||||
|
internal List<int> ArgOffsets { get; }
|
||||||
|
|
||||||
|
internal WorkerFunc(CSharpWorkerFunc func, int argsCount, List<int> argOffsets)
|
||||||
|
{
|
||||||
|
CharpWorkerFunc = func;
|
||||||
|
ArgsCount = argsCount;
|
||||||
|
ArgOffsets = argOffsets;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -81,6 +81,7 @@ namespace WorkerTest
|
||||||
worker.Start();
|
worker.Start();
|
||||||
int serverPort = 0;
|
int serverPort = 0;
|
||||||
serverPort = SerDe.ReadInt(worker.StandardOutput.BaseStream);
|
serverPort = SerDe.ReadInt(worker.StandardOutput.BaseStream);
|
||||||
|
Environment.SetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT", serverPort.ToString());
|
||||||
|
|
||||||
StreamReader stdoutReader = worker.StandardOutput;
|
StreamReader stdoutReader = worker.StandardOutput;
|
||||||
Task.Run(() => {
|
Task.Run(() => {
|
||||||
|
@ -119,7 +120,7 @@ namespace WorkerTest
|
||||||
private ISocketWrapper CreateSocket(int serverPort)
|
private ISocketWrapper CreateSocket(int serverPort)
|
||||||
{
|
{
|
||||||
var socket =SocketFactory.CreateSocket();
|
var socket =SocketFactory.CreateSocket();
|
||||||
socket.Connect(IPAddress.Loopback, serverPort);
|
socket.Connect(IPAddress.Loopback, serverPort, null);
|
||||||
return socket;
|
return socket;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -131,6 +132,10 @@ namespace WorkerTest
|
||||||
{
|
{
|
||||||
SerDe.Write(s, splitIndex);
|
SerDe.Write(s, splitIndex);
|
||||||
SerDe.Write(s, ver);
|
SerDe.Write(s, ver);
|
||||||
|
SerDe.Write(s, 0);
|
||||||
|
SerDe.Write(s, 0);
|
||||||
|
SerDe.Write(s, 0);
|
||||||
|
SerDe.Write(s, 0L);
|
||||||
SerDe.Write(s, sparkFilesDir);
|
SerDe.Write(s, sparkFilesDir);
|
||||||
SerDe.Write(s, numberOfIncludesItems);
|
SerDe.Write(s, numberOfIncludesItems);
|
||||||
SerDe.Write(s, numBroadcastVariables);
|
SerDe.Write(s, numBroadcastVariables);
|
||||||
|
|
|
@ -93,6 +93,7 @@ namespace WorkerTest
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Environment.SetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT", port.ToString());
|
||||||
lock (syncLock)
|
lock (syncLock)
|
||||||
{
|
{
|
||||||
output.Clear();
|
output.Clear();
|
||||||
|
@ -125,6 +126,10 @@ namespace WorkerTest
|
||||||
{
|
{
|
||||||
SerDe.Write(s, splitIndex);
|
SerDe.Write(s, splitIndex);
|
||||||
SerDe.Write(s, ver);
|
SerDe.Write(s, ver);
|
||||||
|
SerDe.Write(s, 0);
|
||||||
|
SerDe.Write(s, 0);
|
||||||
|
SerDe.Write(s, 0);
|
||||||
|
SerDe.Write(s, 0L);
|
||||||
SerDe.Write(s, sparkFilesDir);
|
SerDe.Write(s, sparkFilesDir);
|
||||||
SerDe.Write(s, numberOfIncludesItems);
|
SerDe.Write(s, numberOfIncludesItems);
|
||||||
SerDe.Write(s, numBroadcastVariables);
|
SerDe.Write(s, numBroadcastVariables);
|
||||||
|
@ -631,6 +636,10 @@ namespace WorkerTest
|
||||||
{
|
{
|
||||||
SerDe.Write(s, splitIndex);
|
SerDe.Write(s, splitIndex);
|
||||||
SerDe.Write(s, ver);
|
SerDe.Write(s, ver);
|
||||||
|
SerDe.Write(s, 0);
|
||||||
|
SerDe.Write(s, 0);
|
||||||
|
SerDe.Write(s, 0);
|
||||||
|
SerDe.Write(s, 0L);
|
||||||
SerDe.Write(s, sparkFilesDir);
|
SerDe.Write(s, sparkFilesDir);
|
||||||
SerDe.Write(s, numberOfIncludesItems);
|
SerDe.Write(s, numberOfIncludesItems);
|
||||||
|
|
||||||
|
@ -802,6 +811,10 @@ namespace WorkerTest
|
||||||
using (var inputStream = new MemoryStream(500))
|
using (var inputStream = new MemoryStream(500))
|
||||||
{
|
{
|
||||||
SerDe.Write(inputStream, "1.0"); //version
|
SerDe.Write(inputStream, "1.0"); //version
|
||||||
|
SerDe.Write(inputStream, 0);
|
||||||
|
SerDe.Write(inputStream, 0);
|
||||||
|
SerDe.Write(inputStream, 0);
|
||||||
|
SerDe.Write(inputStream, 0L);
|
||||||
SerDe.Write(inputStream, ""); //includes directory
|
SerDe.Write(inputStream, ""); //includes directory
|
||||||
SerDe.Write(inputStream, 0); //number of included items
|
SerDe.Write(inputStream, 0); //number of included items
|
||||||
SerDe.Write(inputStream, 0); //number of broadcast variables
|
SerDe.Write(inputStream, 0); //number of broadcast variables
|
||||||
|
|
|
@ -35,9 +35,8 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
<Reference Include="Newtonsoft.Json">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||||
<HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
|
<Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
|
|
@ -32,17 +32,17 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
|
|
@ -35,17 +35,17 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
# Visual Studio 14
|
# Visual Studio 14
|
||||||
VisualStudioVersion = 14.0.25123.0
|
VisualStudioVersion = 14.0.25420.1
|
||||||
MinimumVisualStudioVersion = 10.0.40219.1
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HdfsWordCount", "Streaming\HdfsWordCount\HdfsWordCount.csproj", "{6A2C7CF9-D64E-490D-9841-269EE14F7932}"
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HdfsWordCount", "Streaming\HdfsWordCount\HdfsWordCount.csproj", "{6A2C7CF9-D64E-490D-9841-269EE14F7932}"
|
||||||
EndProject
|
EndProject
|
||||||
|
|
|
@ -34,14 +34,17 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="log4net">
|
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
|
|
@ -33,17 +33,17 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
|
</Reference>
|
||||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<Private>True</Private>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
</Reference>
|
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
|
||||||
<Private>True</Private>
|
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="System" />
|
<Reference Include="System" />
|
||||||
<Reference Include="System.Core" />
|
<Reference Include="System.Core" />
|
||||||
|
|
|
@ -34,17 +34,17 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
|
|
@ -34,17 +34,17 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
|
|
@ -34,16 +34,18 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Microsoft.CSharp" />
|
<Reference Include="Microsoft.CSharp" />
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
|
|
@ -32,22 +32,22 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
|
</Reference>
|
||||||
|
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
|
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json">
|
<Reference Include="Newtonsoft.Json">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
<HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="CSharpWorker">
|
|
||||||
<SpecificVersion>False</SpecificVersion>
|
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
|
||||||
</Reference>
|
|
||||||
<Reference Include="log4net">
|
|
||||||
<SpecificVersion>False</SpecificVersion>
|
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
|
||||||
</Reference>
|
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
|
||||||
<SpecificVersion>False</SpecificVersion>
|
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
|
||||||
</Reference>
|
|
||||||
<Reference Include="Razorvine.Pyrolite">
|
<Reference Include="Razorvine.Pyrolite">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
<HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||||
|
|
|
@ -32,15 +32,17 @@
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
|
||||||
</Reference>
|
|
||||||
<Reference Include="log4net, Version=1.2.10.0, Culture=neutral, PublicKeyToken=1b44e1d426115821, processorArchitecture=MSIL">
|
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
|
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||||
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||||
<SpecificVersion>False</SpecificVersion>
|
<SpecificVersion>False</SpecificVersion>
|
||||||
|
|
|
@ -66,13 +66,13 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="CSharpWorker">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="log4net">
|
<Reference Include="log4net">
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="mscorlib" />
|
<Reference Include="mscorlib" />
|
||||||
<Reference Include="FSharp.Core, Version=$(TargetFSharpCoreVersion), Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
|
<Reference Include="FSharp.Core, Version=$(TargetFSharpCoreVersion), Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
|
||||||
|
|
|
@ -71,20 +71,17 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="CSharpWorker">
|
<Reference Include="CSharpWorker">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||||
<Private>True</Private>
|
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="FSharp.Core">
|
<Reference Include="FSharp.Core">
|
||||||
<HintPath>..\..\packages\FSharp.Core.4.0.0.1\lib\net40\FSharp.Core.dll</HintPath>
|
<HintPath>..\..\packages\FSharp.Core.4.0.0.1\lib\net40\FSharp.Core.dll</HintPath>
|
||||||
<Private>True</Private>
|
<Private>True</Private>
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="log4net">
|
<Reference Include="log4net">
|
||||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||||
<Private>True</Private>
|
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||||
<Private>True</Private>
|
|
||||||
</Reference>
|
</Reference>
|
||||||
<Reference Include="mscorlib" />
|
<Reference Include="mscorlib" />
|
||||||
<Reference Include="Newtonsoft.Json">
|
<Reference Include="Newtonsoft.Json">
|
||||||
|
|
|
@ -145,7 +145,7 @@ The following sample commands show how to run Mobius examples in local mode. Usi
|
||||||
Computes the _approximate_ value of Pi using two appropaches and displays the value.
|
Computes the _approximate_ value of Pi using two appropaches and displays the value.
|
||||||
|
|
||||||
### WordCount Example (Batch)
|
### WordCount Example (Batch)
|
||||||
* Run `sparkclr-submit.cmd --exe SparkClrWordCount.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <InputFilePath>`
|
* Run `sparkclr-submit.cmd --exe SparkClrPi.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <InputFilePath>`
|
||||||
|
|
||||||
`InputFilePath` should be in one of the following formats:
|
`InputFilePath` should be in one of the following formats:
|
||||||
* `hdfs://path/to/inputfile`
|
* `hdfs://path/to/inputfile`
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>com.microsoft.sparkclr</groupId>
|
<groupId>com.microsoft.sparkclr</groupId>
|
||||||
<artifactId>spark-clr_2.11</artifactId>
|
<artifactId>spark-clr_2.11</artifactId>
|
||||||
<version>2.0.200-SNAPSHOT</version>
|
<version>2.3.1-SNAPSHOT</version>
|
||||||
<name>Mobius Project</name>
|
<name>Mobius Project</name>
|
||||||
<description>C# language binding and extensions to Apache Spark</description>
|
<description>C# language binding and extensions to Apache Spark</description>
|
||||||
<url>https://github.com/Microsoft/Mobius</url>
|
<url>https://github.com/Microsoft/Mobius</url>
|
||||||
|
@ -35,7 +35,7 @@
|
||||||
<maven.compiler.target>1.5</maven.compiler.target>
|
<maven.compiler.target>1.5</maven.compiler.target>
|
||||||
<encoding>UTF-8</encoding>
|
<encoding>UTF-8</encoding>
|
||||||
<scala.version>2.11.8</scala.version>
|
<scala.version>2.11.8</scala.version>
|
||||||
<spark.version>2.0.2</spark.version>
|
<spark.version>2.3.1</spark.version>
|
||||||
<scala.binary.version>2.11</scala.binary.version>
|
<scala.binary.version>2.11</scala.binary.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
|
@ -106,14 +106,19 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-hive_2.11</artifactId>
|
<artifactId>spark-hive_2.11</artifactId>
|
||||||
<version>2.0.0</version>
|
<version>${spark.version}</version>
|
||||||
<!--the following is placeholder for building uber package. Please keep as-is-->
|
<!--the following is placeholder for building uber package. Please keep as-is-->
|
||||||
<!--<scope>provided</scope>-->
|
<!--<scope>provided</scope>-->
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.databricks</groupId>
|
<groupId>com.databricks</groupId>
|
||||||
<artifactId>spark-csv_2.10</artifactId>
|
<artifactId>spark-csv_2.11</artifactId>
|
||||||
<version>1.4.0</version>
|
<version>1.5.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.databricks</groupId>
|
||||||
|
<artifactId>spark-avro_2.11</artifactId>
|
||||||
|
<version>4.0.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ import java.util.{List => JList, Map => JMap}
|
||||||
|
|
||||||
import org.apache.hadoop.io.compress.CompressionCodec
|
import org.apache.hadoop.io.compress.CompressionCodec
|
||||||
import org.apache.spark.api.python._
|
import org.apache.spark.api.python._
|
||||||
|
import org.apache.spark.api.python.PythonAccumulatorV2
|
||||||
import org.apache.spark.broadcast.Broadcast
|
import org.apache.spark.broadcast.Broadcast
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
import org.apache.spark._
|
import org.apache.spark._
|
||||||
|
@ -34,7 +35,7 @@ class CSharpRDD(
|
||||||
cSharpWorkerExecutable: String,
|
cSharpWorkerExecutable: String,
|
||||||
unUsedVersionIdentifier: String,
|
unUsedVersionIdentifier: String,
|
||||||
broadcastVars: JList[Broadcast[PythonBroadcast]],
|
broadcastVars: JList[Broadcast[PythonBroadcast]],
|
||||||
accumulator: Accumulator[JList[Array[Byte]]])
|
accumulator: PythonAccumulatorV2)
|
||||||
extends PythonRDD (
|
extends PythonRDD (
|
||||||
parent,
|
parent,
|
||||||
SQLUtils.createCSharpFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
|
SQLUtils.createCSharpFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
|
||||||
|
@ -95,7 +96,7 @@ class CSharpRDD(
|
||||||
logInfo("Env vars: " + envVars.asScala.mkString(", "))
|
logInfo("Env vars: " + envVars.asScala.mkString(", "))
|
||||||
|
|
||||||
val runner = new PythonRunner(
|
val runner = new PythonRunner(
|
||||||
Seq(ChainedPythonFunctions(Seq(func))), bufferSize, reuse_worker, false, Array(Array(0)))
|
Seq(ChainedPythonFunctions(Seq(func))), bufferSize, reuseWorker)
|
||||||
runner.compute(firstParent.iterator(split, context), split.index, context)
|
runner.compute(firstParent.iterator(split, context), split.index, context)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ package org.apache.spark.sql.api.csharp
|
||||||
import java.io.{ByteArrayOutputStream, DataOutputStream}
|
import java.io.{ByteArrayOutputStream, DataOutputStream}
|
||||||
|
|
||||||
import org.apache.spark.{Accumulator, SparkContext}
|
import org.apache.spark.{Accumulator, SparkContext}
|
||||||
|
import org.apache.spark.api.python.PythonAccumulatorV2
|
||||||
import org.apache.spark.api.csharp.SerDe
|
import org.apache.spark.api.csharp.SerDe
|
||||||
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
|
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
|
||||||
import org.apache.spark.api.python.{PythonBroadcast, PythonFunction, SerDeUtil}
|
import org.apache.spark.api.python.{PythonBroadcast, PythonFunction, SerDeUtil}
|
||||||
|
@ -51,7 +52,7 @@ object SQLUtils {
|
||||||
cSharpWorkerExecutable: String,
|
cSharpWorkerExecutable: String,
|
||||||
unUsedVersionIdentifier: String,
|
unUsedVersionIdentifier: String,
|
||||||
broadcastVars: JList[Broadcast[PythonBroadcast]],
|
broadcastVars: JList[Broadcast[PythonBroadcast]],
|
||||||
accumulator: Accumulator[JList[Array[Byte]]]) : PythonFunction = {
|
accumulator: PythonAccumulatorV2) : PythonFunction = {
|
||||||
PythonFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
|
PythonFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
|
||||||
unUsedVersionIdentifier, broadcastVars, accumulator)
|
unUsedVersionIdentifier, broadcastVars, accumulator)
|
||||||
}
|
}
|
||||||
|
|
|
@ -127,17 +127,17 @@ object Utils extends Logging {
|
||||||
timer.schedule(new TimerTask() {
|
timer.schedule(new TimerTask() {
|
||||||
@Override
|
@Override
|
||||||
def run() {
|
def run() {
|
||||||
Runtime.getRuntime.halt(status)
|
if (status!=0) { Runtime.getRuntime.halt(status); }
|
||||||
}
|
}
|
||||||
}, maxDelayMillis)
|
}, maxDelayMillis)
|
||||||
// try to exit nicely
|
// try to exit nicely
|
||||||
System.exit(status);
|
if (status!=0) { System.exit(status); }
|
||||||
} catch {
|
} catch {
|
||||||
// exit nastily if we have a problem
|
// exit nastily if we have a problem
|
||||||
case ex: Throwable => Runtime.getRuntime.halt(status)
|
case ex: Throwable => Runtime.getRuntime.halt(status)
|
||||||
} finally {
|
} finally {
|
||||||
// should never get here
|
// should never get here
|
||||||
Runtime.getRuntime.halt(status)
|
if (status!=0) { Runtime.getRuntime.halt(status); }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,7 +147,7 @@ object Utils extends Logging {
|
||||||
* @param status the exit status, zero for OK, non-zero for error
|
* @param status the exit status, zero for OK, non-zero for error
|
||||||
*/
|
*/
|
||||||
def exit(status: Int): Unit = {
|
def exit(status: Int): Unit = {
|
||||||
exit(status, 1000)
|
exit(status, 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
private[spark] def listZipFileEntries(file: File): Array[String] = {
|
private[spark] def listZipFileEntries(file: File): Array[String] = {
|
||||||
|
|
|
@ -42,7 +42,7 @@ if not exist "%SPARK_JARS_DIR%" (
|
||||||
|
|
||||||
set SPARK_JARS_CLASSPATH=%SPARK_JARS_DIR%\*
|
set SPARK_JARS_CLASSPATH=%SPARK_JARS_DIR%\*
|
||||||
|
|
||||||
if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.11-2.0.200-SNAPSHOT.jar)
|
if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.11-2.3.1-SNAPSHOT.jar)
|
||||||
echo [sparkclr-submit.cmd] SPARKCLR_JAR=%SPARKCLR_JAR%
|
echo [sparkclr-submit.cmd] SPARKCLR_JAR=%SPARKCLR_JAR%
|
||||||
set SPARKCLR_CLASSPATH=%SPARKCLR_HOME%\lib\%SPARKCLR_JAR%
|
set SPARKCLR_CLASSPATH=%SPARKCLR_HOME%\lib\%SPARKCLR_JAR%
|
||||||
REM SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
|
REM SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
|
||||||
|
@ -105,4 +105,4 @@ goto :eof
|
||||||
@echo Example 2:
|
@echo Example 2:
|
||||||
@echo sparkclr-submit.cmd [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe c:\sparkclrapp\driver.zip arg1 arg2 arg3
|
@echo sparkclr-submit.cmd [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe c:\sparkclrapp\driver.zip arg1 arg2 arg3
|
||||||
@echo Example 3:
|
@echo Example 3:
|
||||||
@echo sparkclr-submit.cmd [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar hdfs://path/to/spark-clr-1.6.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3
|
@echo sparkclr-submit.cmd [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar hdfs://path/to/spark-clr_2.11-2.3.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3
|
||||||
|
|
|
@ -32,7 +32,7 @@ function usage() {
|
||||||
echo "Example 2:"
|
echo "Example 2:"
|
||||||
echo "sparkclr-submit.sh [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe sparkclrapp/driver.zip arg1 arg2 arg3"
|
echo "sparkclr-submit.sh [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe sparkclrapp/driver.zip arg1 arg2 arg3"
|
||||||
echo "Example 3:"
|
echo "Example 3:"
|
||||||
echo "sparkclr-submit.sh [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar --remote-sparkclr-jar hdfs://path/to/spark-clr_2.10-1.6.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3"
|
echo "sparkclr-submit.sh [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar --remote-sparkclr-jar hdfs://path/to/spark-clr_2.11-2.3.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3"
|
||||||
}
|
}
|
||||||
|
|
||||||
[ "$SPARK_HOME" = "" ] && spark_home_error
|
[ "$SPARK_HOME" = "" ] && spark_home_error
|
||||||
|
@ -57,7 +57,7 @@ fi
|
||||||
|
|
||||||
export SPARK_JARS_CLASSPATH="$SPARK_JARS_DIR/*"
|
export SPARK_JARS_CLASSPATH="$SPARK_JARS_DIR/*"
|
||||||
|
|
||||||
export SPARKCLR_JAR=spark-clr_2.11-2.0.200-SNAPSHOT.jar
|
export SPARKCLR_JAR=spark-clr_2.11-2.3.1-SNAPSHOT.jar
|
||||||
export SPARKCLR_CLASSPATH="$SPARKCLR_HOME/lib/$SPARKCLR_JAR"
|
export SPARKCLR_CLASSPATH="$SPARKCLR_HOME/lib/$SPARKCLR_JAR"
|
||||||
# SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
|
# SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
|
||||||
[ ! "$SPARKCLR_DEBUGMODE_EXT_JARS" = "" ] && export SPARKCLR_CLASSPATH="$SPARKCLR_CLASSPATH:$SPARKCLR_DEBUGMODE_EXT_JARS"
|
[ ! "$SPARKCLR_DEBUGMODE_EXT_JARS" = "" ] && export SPARKCLR_CLASSPATH="$SPARKCLR_CLASSPATH:$SPARKCLR_DEBUGMODE_EXT_JARS"
|
||||||
|
|
Загрузка…
Ссылка в новой задаче