Dev/sutyag/upgrade mobius (#697)
* basic * Add extractor and outputter * Add reducer not done * Add procedure * kill node, broadcast, upload executable error feed to cosmos, specify avro or parquet syntax * Add more functions to HDFS. Add submitter heartbeat Update doc * Redesign cosmos download, add replication setting for hdfs * Improve executable runner to deal with bad lines * MERGE MOBIUS * change dependency path * Add registration method to mobius * Major refactoring to add ISparkosmosModule to modulize everything Start supporting streaming Fixed a couple of Mobius bugs Added integration tests Reenabled unit tests Added DatedPath * Make sparkcontext settable, fix setjobgroup * Expose more interface from Mobius * Mobius change for Spark 2.3 * fix version conflict, remove unused files * Added support for multiple UDFs * Fixed non sql udf issue * 1. Upgarde mobius to spark 2.3.1 2. Fixed UDF bugs 3. Added support for multipe UDFs * 1. Added sample testcases 2.Updated referece for examples * Removed stashed files * Fixed review comments * Fixed review comments * Fixed failed unit test cases * Deleting all the things * Updated version in appveyor * Updated tartool download path * Fixed java process terminate issue * Revert access modifier to internal from public for JvmBridge
This commit is contained in:
Родитель
09462fff7e
Коммит
9aa97b98c6
|
@ -157,4 +157,4 @@ Mobius is licensed under the MIT license. See [LICENSE](LICENSE) file for full l
|
|||
* tweet [@MobiusForSpark](http://twitter.com/MobiusForSpark)
|
||||
|
||||
## Code of Conduct
|
||||
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
|
||||
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
|
|
@ -1,4 +1,4 @@
|
|||
version: 2.0.2-SNAPSHOT.{build}
|
||||
version: 2.3.1-SNAPSHOT.{build}
|
||||
|
||||
environment:
|
||||
securefile:
|
||||
|
|
|
@ -6,6 +6,8 @@ rem Copyright (c) Microsoft. All rights reserved.
|
|||
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
rem
|
||||
|
||||
SET MAVEN_OPTS=-Dhttps.protocols=TLSv1,TLSv1.1,TLSv1.2
|
||||
|
||||
if "%1" == "csharp" set buildCSharp=true
|
||||
|
||||
SET CMDHOME=%~dp0
|
||||
|
|
|
@ -47,7 +47,7 @@ if "%precheck%" == "bad" (goto :EOF)
|
|||
@rem
|
||||
@rem setup Hadoop and Spark versions
|
||||
@rem
|
||||
set SPARK_VERSION=2.0.2
|
||||
set SPARK_VERSION=2.3.1
|
||||
set HADOOP_VERSION=2.6
|
||||
set APACHE_DIST_SERVER=archive.apache.org
|
||||
@echo [RunSamples.cmd] SPARK_VERSION=%SPARK_VERSION%, HADOOP_VERSION=%HADOOP_VERSION%, APACHE_DIST_SERVER=%APACHE_DIST_SERVER%
|
||||
|
@ -100,7 +100,7 @@ if "!USER_EXE!"=="" (
|
|||
call sparkclr-submit.cmd --conf spark.sql.warehouse.dir=%TEMP_DIR% %*
|
||||
)
|
||||
|
||||
@if ERRORLEVEL 1 GOTO :ErrorStop
|
||||
@if ERRORLEVEL 2 GOTO :ErrorStop
|
||||
|
||||
@GOTO :EOF
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ if ($stage.ToLower() -eq "run")
|
|||
$hadoopVersion = if ($envValue -eq $null) { "2.6" } else { $envValue }
|
||||
|
||||
$envValue = [Environment]::GetEnvironmentVariable("SPARK_VERSION")
|
||||
$sparkVersion = if ($envValue -eq $null) { "2.0.2" } else { $envValue }
|
||||
$sparkVersion = if ($envValue -eq $null) { "2.3.1" } else { $envValue }
|
||||
|
||||
Write-Output "[downloadtools] hadoopVersion=$hadoopVersion, sparkVersion=$sparkVersion, apacheDistServer=$apacheDistServer"
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ do
|
|||
done
|
||||
|
||||
# setup Hadoop and Spark versions
|
||||
export SPARK_VERSION=2.0.2
|
||||
export SPARK_VERSION=2.3.1
|
||||
export HADOOP_VERSION=2.6
|
||||
export APACHE_DIST_SERVER=archive.apache.org
|
||||
echo "[run-samples.sh] SPARK_VERSION=$SPARK_VERSION, HADOOP_VERSION=$HADOOP_VERSION, APACHE_DIST_SERVER=$APACHE_DIST_SERVER"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
|
@ -20,13 +20,13 @@
|
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
|
|
|
@ -35,16 +35,17 @@
|
|||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<DocumentationFile>..\documentation\Microsoft.Spark.CSharp.Adapter.Doc.XML</DocumentationFile>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<StartupObject />
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="log4net">
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<HintPath>..\..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json">
|
||||
<HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<HintPath>..\..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Pyrolite">
|
||||
<HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||
|
@ -98,6 +99,7 @@
|
|||
<Compile Include="Network\RioNative.cs" />
|
||||
<Compile Include="Network\RioSocketWrapper.cs" />
|
||||
<Compile Include="Network\SaeaSocketWrapper.cs" />
|
||||
<Compile Include="Network\SocketInfo.cs" />
|
||||
<Compile Include="Network\SocketStream.cs" />
|
||||
<Compile Include="Network\SockDataToken.cs" />
|
||||
<Compile Include="Network\SocketFactory.cs" />
|
||||
|
@ -184,6 +186,7 @@
|
|||
<ItemGroup>
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup />
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<PropertyGroup>
|
||||
<PostBuildEvent>
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Core
|
||||
{
|
||||
|
@ -11,6 +12,6 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// </summary>
|
||||
interface IRDDCollector
|
||||
{
|
||||
IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type);
|
||||
IEnumerable<dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
using Microsoft.Spark.CSharp.Proxy;
|
||||
using Microsoft.Spark.CSharp.Services;
|
||||
|
||||
|
@ -60,6 +61,7 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
{
|
||||
return sparkContext;
|
||||
}
|
||||
set { sparkContext = value; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -592,13 +594,13 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// <returns></returns>
|
||||
public T[] Collect()
|
||||
{
|
||||
int port = RddProxy.CollectAndServe();
|
||||
return Collect(port).Cast<T>().ToArray();
|
||||
var info = RddProxy.CollectAndServe();
|
||||
return Collect(info).Cast<T>().ToArray();
|
||||
}
|
||||
|
||||
internal IEnumerable<dynamic> Collect(int port)
|
||||
internal IEnumerable<dynamic> Collect(SocketInfo info)
|
||||
{
|
||||
return RddProxy.RDDCollector.Collect(port, serializedMode, typeof(T));
|
||||
return RddProxy.RDDCollector.Collect(info, serializedMode, typeof(T));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -830,9 +832,9 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
|
||||
|
||||
var mappedRDD = MapPartitionsWithIndex<T>(new TakeHelper<T>(left).Execute);
|
||||
int port = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, partitions);
|
||||
var info = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, partitions);
|
||||
|
||||
IEnumerable<T> res = Collect(port).Cast<T>();
|
||||
IEnumerable<T> res = Collect(info).Cast<T>();
|
||||
|
||||
items.AddRange(res);
|
||||
partsScanned += numPartsToTry;
|
||||
|
@ -925,7 +927,7 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// <returns></returns>
|
||||
public RDD<T> Repartition(int numPartitions)
|
||||
{
|
||||
return new RDD<T>(RddProxy.Repartition(numPartitions), sparkContext);
|
||||
return new RDD<T>(RddProxy.Repartition(numPartitions), sparkContext, serializedMode);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -942,8 +944,8 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// <returns></returns>
|
||||
public RDD<T> Coalesce(int numPartitions, bool shuffle = false)
|
||||
{
|
||||
return new RDD<T>(RddProxy.Coalesce(numPartitions, shuffle), sparkContext);
|
||||
}
|
||||
return new RDD<T>(RddProxy.Coalesce(numPartitions, shuffle), sparkContext, serializedMode);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Zips this RDD with another one, returning key-value pairs with the
|
||||
|
@ -1065,8 +1067,8 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
foreach (int partition in Enumerable.Range(0, GetNumPartitions()))
|
||||
{
|
||||
var mappedRDD = MapPartitionsWithIndex<T>((pid, iter) => iter);
|
||||
int port = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, Enumerable.Range(partition, 1));
|
||||
foreach (T row in Collect(port))
|
||||
var info = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, Enumerable.Range(partition, 1));
|
||||
foreach (T row in Collect(info))
|
||||
yield return row;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ using System.Runtime.Serialization.Formatters.Binary;
|
|||
using System.Text;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
using Microsoft.Spark.CSharp.Services;
|
||||
using Microsoft.Spark.CSharp.Sql;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Core
|
||||
|
@ -20,14 +21,31 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// </summary>
|
||||
class RDDCollector : IRDDCollector
|
||||
{
|
||||
public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
|
||||
private static ILoggerService logger;
|
||||
private static ILoggerService Logger
|
||||
{
|
||||
get
|
||||
{
|
||||
if (logger != null) return logger;
|
||||
logger = LoggerServiceFactory.GetLogger(typeof(RDDCollector));
|
||||
return logger;
|
||||
}
|
||||
}
|
||||
|
||||
public IEnumerable<dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type)
|
||||
{
|
||||
IFormatter formatter = new BinaryFormatter();
|
||||
var sock = SocketFactory.CreateSocket();
|
||||
sock.Connect(IPAddress.Loopback, port);
|
||||
sock.Connect(IPAddress.Loopback, info.Port, null);
|
||||
|
||||
using (var s = sock.GetStream())
|
||||
{
|
||||
if (info.Secret != null)
|
||||
{
|
||||
SerDe.Write(s, info.Secret);
|
||||
var reply = SerDe.ReadString(s);
|
||||
Logger.LogDebug("Connect back to JVM: " + reply);
|
||||
}
|
||||
byte[] buffer;
|
||||
while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0)
|
||||
{
|
||||
|
|
|
@ -36,7 +36,7 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
|
|||
if (!sockets.TryDequeue(out socket))
|
||||
{
|
||||
socket = SocketFactory.CreateSocket();
|
||||
socket.Connect(IPAddress.Loopback, portNumber);
|
||||
socket.Connect(IPAddress.Loopback, portNumber, null);
|
||||
}
|
||||
return socket;
|
||||
}
|
||||
|
|
|
@ -12,12 +12,12 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
|
|||
/// Reference to object created in JVM
|
||||
/// </summary>
|
||||
[Serializable]
|
||||
internal class JvmObjectReference
|
||||
public class JvmObjectReference
|
||||
{
|
||||
public string Id { get; private set; }
|
||||
private DateTime creationTime;
|
||||
|
||||
public JvmObjectReference(string jvmReferenceId)
|
||||
internal JvmObjectReference(string jvmReferenceId)
|
||||
{
|
||||
Id = jvmReferenceId;
|
||||
creationTime = DateTime.UtcNow;
|
||||
|
@ -48,6 +48,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
|
|||
return base.GetHashCode();
|
||||
}
|
||||
|
||||
public string ObjectToString()
|
||||
{
|
||||
return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "toString").ToString();
|
||||
}
|
||||
|
||||
public string GetDebugInfo()
|
||||
{
|
||||
var javaObjectReferenceForClassObject = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "getClass").ToString());
|
||||
|
|
|
@ -31,7 +31,9 @@ namespace Microsoft.Spark.CSharp.Interop
|
|||
}
|
||||
}
|
||||
|
||||
internal static IConfigurationService configurationService;
|
||||
internal static IJvmBridge JvmBridge => SparkCLRIpcProxy.JvmBridge;
|
||||
|
||||
internal static IConfigurationService configurationService;
|
||||
|
||||
internal static IConfigurationService ConfigurationService
|
||||
{
|
||||
|
|
|
@ -11,7 +11,7 @@ namespace Microsoft.Spark.CSharp.Network
|
|||
/// ByteBuf delimits a section of a ByteBufChunk.
|
||||
/// It is the smallest unit to be allocated.
|
||||
/// </summary>
|
||||
internal class ByteBuf
|
||||
public class ByteBuf
|
||||
{
|
||||
private int readerIndex;
|
||||
private int writerIndex;
|
||||
|
|
|
@ -2,182 +2,203 @@
|
|||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using Microsoft.Spark.CSharp.Configuration;
|
||||
using Microsoft.Spark.CSharp.Services;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Network
|
||||
{
|
||||
/// <summary>
|
||||
/// A simple wrapper of System.Net.Sockets.Socket class.
|
||||
/// </summary>
|
||||
internal class DefaultSocketWrapper : ISocketWrapper
|
||||
{
|
||||
private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(DefaultSocketWrapper));
|
||||
private readonly Socket innerSocket;
|
||||
/// <summary>
|
||||
/// A simple wrapper of System.Net.Sockets.Socket class.
|
||||
/// </summary>
|
||||
internal class DefaultSocketWrapper : ISocketWrapper
|
||||
{
|
||||
private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(DefaultSocketWrapper));
|
||||
private readonly Socket innerSocket;
|
||||
|
||||
/// <summary>
|
||||
/// Default constructor that creates a new instance of DefaultSocket class which represents
|
||||
/// a traditional socket (System.Net.Socket.Socket).
|
||||
///
|
||||
/// This socket is bound to Loopback with port 0.
|
||||
/// </summary>
|
||||
public DefaultSocketWrapper()
|
||||
{
|
||||
innerSocket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
|
||||
var localEndPoint = new IPEndPoint(IPAddress.Loopback, 0);
|
||||
innerSocket.Bind(localEndPoint);
|
||||
}
|
||||
/// <summary>
|
||||
/// Default constructor that creates a new instance of DefaultSocket class which represents
|
||||
/// a traditional socket (System.Net.Socket.Socket).
|
||||
///
|
||||
/// This socket is bound to Loopback with port 0.
|
||||
/// </summary>
|
||||
public DefaultSocketWrapper()
|
||||
{
|
||||
innerSocket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
|
||||
var localEndPoint = new IPEndPoint(IPAddress.Loopback, 0);
|
||||
innerSocket.Bind(localEndPoint);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a instance of DefaultSocket class using the specified System.Net.Socket.Socket object.
|
||||
/// </summary>
|
||||
/// <param name="socket">The existing socket</param>
|
||||
private DefaultSocketWrapper(Socket socket)
|
||||
{
|
||||
innerSocket = socket;
|
||||
}
|
||||
/// <summary>
|
||||
/// Initializes a instance of DefaultSocket class using the specified System.Net.Socket.Socket object.
|
||||
/// </summary>
|
||||
/// <param name="socket">The existing socket</param>
|
||||
private DefaultSocketWrapper(Socket socket)
|
||||
{
|
||||
innerSocket = socket;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Accepts a incoming connection request.
|
||||
/// </summary>
|
||||
/// <returns>A DefaultSocket instance used to send and receive data</returns>
|
||||
public ISocketWrapper Accept()
|
||||
{
|
||||
var socket = innerSocket.Accept();
|
||||
return new DefaultSocketWrapper(socket);
|
||||
}
|
||||
/// <summary>
|
||||
/// Accepts a incoming connection request.
|
||||
/// </summary>
|
||||
/// <returns>A DefaultSocket instance used to send and receive data</returns>
|
||||
public ISocketWrapper Accept()
|
||||
{
|
||||
var socket = innerSocket.Accept();
|
||||
return new DefaultSocketWrapper(socket);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Close the socket connections and releases all associated resources.
|
||||
/// </summary>
|
||||
public void Close()
|
||||
{
|
||||
innerSocket.Close();
|
||||
}
|
||||
/// <summary>
|
||||
/// Close the socket connections and releases all associated resources.
|
||||
/// </summary>
|
||||
public void Close()
|
||||
{
|
||||
innerSocket.Close();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||
/// </summary>
|
||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||
/// <param name="port">The port number of the remote host</param>
|
||||
public void Connect(IPAddress remoteaddr, int port)
|
||||
{
|
||||
var remoteEndPoint = new IPEndPoint(remoteaddr, port);
|
||||
innerSocket.Connect(remoteEndPoint);
|
||||
}
|
||||
/// <summary>
|
||||
/// Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||
/// </summary>
|
||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||
/// <param name="port">The port number of the remote host</param>
|
||||
public void Connect(IPAddress remoteaddr, int port, string secret)
|
||||
{
|
||||
var remoteEndPoint = new IPEndPoint(remoteaddr, port);
|
||||
innerSocket.Connect(remoteEndPoint);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the NetworkStream used to send and receive data.
|
||||
/// </summary>
|
||||
/// <returns>The underlying Stream instance that be used to send and receive data</returns>
|
||||
/// <remarks>
|
||||
/// GetStream returns a NetworkStream that you can use to send and receive data. You must close/dispose
|
||||
/// the NetworkStream by yourself. Closing DefaultSocketWrapper does not release the NetworkStream
|
||||
/// </remarks>
|
||||
public Stream GetStream()
|
||||
{
|
||||
return new NetworkStream(innerSocket);
|
||||
}
|
||||
private static byte[] ReceiveAll(Socket socket, int len)
|
||||
{
|
||||
var buffer = new List<byte>();
|
||||
|
||||
/// <summary>
|
||||
/// Returns a stream used to receive data only.
|
||||
/// </summary>
|
||||
/// <returns>The underlying Stream instance that be used to receive data</returns>
|
||||
public Stream GetInputStream()
|
||||
{
|
||||
// The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
|
||||
var readBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerReadBufferSizeEnvName) ?? "65536");
|
||||
logger.LogDebug("Input stream buffer size: [{0}]", readBufferSize);
|
||||
return readBufferSize > 0 ? new BufferedStream(GetStream(), readBufferSize) : GetStream();
|
||||
}
|
||||
while (socket.Available > 0 && buffer.Count < len)
|
||||
{
|
||||
var currByte = new Byte[1];
|
||||
var byteCounter = socket.Receive(currByte, currByte.Length, SocketFlags.None);
|
||||
|
||||
/// <summary>
|
||||
/// Returns a stream used to send data only.
|
||||
/// </summary>
|
||||
/// <returns>The underlying Stream instance that be used to send data</returns>
|
||||
public Stream GetOutputStream()
|
||||
{
|
||||
// The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
|
||||
var writeBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerWriteBufferSizeEnvName) ?? "65536");
|
||||
logger.LogDebug("Output stream buffer size: [{0}]", writeBufferSize);
|
||||
return writeBufferSize > 0 ? new BufferedStream(GetStream(), writeBufferSize) : GetStream();
|
||||
}
|
||||
if (byteCounter.Equals(1))
|
||||
{
|
||||
buffer.Add(currByte[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts listening for incoming connections requests
|
||||
/// </summary>
|
||||
/// <param name="backlog">The maximum length of the pending connections queue. </param>
|
||||
public void Listen(int backlog = 16)
|
||||
{
|
||||
innerSocket.Listen(backlog);
|
||||
}
|
||||
return buffer.ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Receives network data from this socket, and returns a ByteBuf that contains the received data.
|
||||
///
|
||||
/// The DefaultSocketWrapper does not support this function.
|
||||
/// </summary>
|
||||
/// <returns>A ByteBuf object that contains received data.</returns>
|
||||
public ByteBuf Receive()
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
/// <summary>
|
||||
/// Returns the NetworkStream used to send and receive data.
|
||||
/// </summary>
|
||||
/// <returns>The underlying Stream instance that be used to send and receive data</returns>
|
||||
/// <remarks>
|
||||
/// GetStream returns a NetworkStream that you can use to send and receive data. You must close/dispose
|
||||
/// the NetworkStream by yourself. Closing DefaultSocketWrapper does not release the NetworkStream
|
||||
/// </remarks>
|
||||
public Stream GetStream()
|
||||
{
|
||||
return new NetworkStream(innerSocket);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sends data to this socket with a ByteBuf object that contains data to be sent.
|
||||
///
|
||||
/// The DefaultSocketWrapper does not support this function.
|
||||
/// </summary>
|
||||
/// <param name="data">A ByteBuf object that contains data to be sent</param>
|
||||
public void Send(ByteBuf data)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
/// <summary>
|
||||
/// Returns a stream used to receive data only.
|
||||
/// </summary>
|
||||
/// <returns>The underlying Stream instance that be used to receive data</returns>
|
||||
public Stream GetInputStream()
|
||||
{
|
||||
// The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
|
||||
var readBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerReadBufferSizeEnvName) ?? "65536");
|
||||
logger.LogDebug("Input stream buffer size: [{0}]", readBufferSize);
|
||||
return readBufferSize > 0 ? new BufferedStream(GetStream(), readBufferSize) : GetStream();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disposes the resources used by this instance of the DefaultSocket class.
|
||||
/// </summary>
|
||||
/// <param name="disposing"></param>
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
innerSocket.Dispose();
|
||||
}
|
||||
}
|
||||
/// <summary>
|
||||
/// Returns a stream used to send data only.
|
||||
/// </summary>
|
||||
/// <returns>The underlying Stream instance that be used to send data</returns>
|
||||
public Stream GetOutputStream()
|
||||
{
|
||||
// The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
|
||||
var writeBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerWriteBufferSizeEnvName) ?? "65536");
|
||||
logger.LogDebug("Output stream buffer size: [{0}]", writeBufferSize);
|
||||
return writeBufferSize > 0 ? new BufferedStream(GetStream(), writeBufferSize) : GetStream();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases all resources used by the current instance of the DefaultSocket class.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
Dispose(true);
|
||||
}
|
||||
/// <summary>
|
||||
/// Starts listening for incoming connections requests
|
||||
/// </summary>
|
||||
/// <param name="backlog">The maximum length of the pending connections queue. </param>
|
||||
public void Listen(int backlog = 16)
|
||||
{
|
||||
innerSocket.Listen(backlog);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Frees resources used by DefaultSocket class
|
||||
/// </summary>
|
||||
~DefaultSocketWrapper()
|
||||
{
|
||||
Dispose(false);
|
||||
}
|
||||
/// <summary>
|
||||
/// Receives network data from this socket, and returns a ByteBuf that contains the received data.
|
||||
///
|
||||
/// The DefaultSocketWrapper does not support this function.
|
||||
/// </summary>
|
||||
/// <returns>A ByteBuf object that contains received data.</returns>
|
||||
public ByteBuf Receive()
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Indicates whether there are data that has been received from the network and is available to be read.
|
||||
/// </summary>
|
||||
public bool HasData { get { return innerSocket.Available > 0; } }
|
||||
/// <summary>
|
||||
/// Sends data to this socket with a ByteBuf object that contains data to be sent.
|
||||
///
|
||||
/// The DefaultSocketWrapper does not support this function.
|
||||
/// </summary>
|
||||
/// <param name="data">A ByteBuf object that contains data to be sent</param>
|
||||
public void Send(ByteBuf data)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the local endpoint.
|
||||
/// </summary>
|
||||
public EndPoint LocalEndPoint { get { return innerSocket.LocalEndPoint; } }
|
||||
/// <summary>
|
||||
/// Disposes the resources used by this instance of the DefaultSocket class.
|
||||
/// </summary>
|
||||
/// <param name="disposing"></param>
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
innerSocket.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the remote endpoint if it has one.
|
||||
/// </summary>
|
||||
public EndPoint RemoteEndPoint { get { return innerSocket.RemoteEndPoint; } }
|
||||
}
|
||||
/// <summary>
|
||||
/// Releases all resources used by the current instance of the DefaultSocket class.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
Dispose(true);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Frees resources used by DefaultSocket class
|
||||
/// </summary>
|
||||
~DefaultSocketWrapper()
|
||||
{
|
||||
Dispose(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Indicates whether there are data that has been received from the network and is available to be read.
|
||||
/// </summary>
|
||||
public bool HasData { get { return innerSocket.Available > 0; } }
|
||||
|
||||
/// <summary>
|
||||
/// Returns the local endpoint.
|
||||
/// </summary>
|
||||
public EndPoint LocalEndPoint { get { return innerSocket.LocalEndPoint; } }
|
||||
|
||||
/// <summary>
|
||||
/// Returns the remote endpoint if it has one.
|
||||
/// </summary>
|
||||
public EndPoint RemoteEndPoint { get { return innerSocket.RemoteEndPoint; } }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ namespace Microsoft.Spark.CSharp.Network
|
|||
/// ISocketWrapper interface defines the common methods to operate a socket (traditional socket or
|
||||
/// Windows Registered IO socket)
|
||||
/// </summary>
|
||||
internal interface ISocketWrapper : IDisposable
|
||||
public interface ISocketWrapper : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Accepts a incoming connection request.
|
||||
|
@ -24,12 +24,13 @@ namespace Microsoft.Spark.CSharp.Network
|
|||
/// </summary>
|
||||
void Close();
|
||||
|
||||
/// <summary>
|
||||
/// Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||
/// </summary>
|
||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||
/// <param name="port">The port number of the remote host</param>
|
||||
void Connect(IPAddress remoteaddr, int port);
|
||||
/// <summary>
|
||||
/// Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||
/// </summary>
|
||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||
/// <param name="port">The port number of the remote host</param>
|
||||
/// <param name="secret">The secret to connect, can be null</param>
|
||||
void Connect(IPAddress remoteaddr, int port, string secret);
|
||||
|
||||
/// <summary>
|
||||
/// Returns a stream used to send and receive data.
|
||||
|
|
|
@ -151,7 +151,7 @@ namespace Microsoft.Spark.CSharp.Network
|
|||
/// </summary>
|
||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||
/// <param name="port">The port number of the remote host</param>
|
||||
public void Connect(IPAddress remoteaddr, int port)
|
||||
public void Connect(IPAddress remoteaddr, int port, string secret)
|
||||
{
|
||||
EnsureAccessible();
|
||||
|
||||
|
|
|
@ -111,7 +111,7 @@ namespace Microsoft.Spark.CSharp.Network
|
|||
/// </summary>
|
||||
/// <param name="remoteaddr">The IP address of the remote host</param>
|
||||
/// <param name="port">The port number of the remote host</param>
|
||||
public void Connect(IPAddress remoteaddr, int port)
|
||||
public void Connect(IPAddress remoteaddr, int port, string secret)
|
||||
{
|
||||
var remoteEndPoint = new IPEndPoint(remoteaddr, port);
|
||||
innerSocket.Connect(remoteEndPoint);
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Network
|
||||
{
|
||||
public class SocketInfo
|
||||
{
|
||||
public readonly int Port;
|
||||
public readonly string Secret;
|
||||
|
||||
public SocketInfo(int port, string secret)
|
||||
{
|
||||
Port = port;
|
||||
Secret = secret;
|
||||
}
|
||||
|
||||
public static SocketInfo Parse(object o)
|
||||
{
|
||||
var oo = o as List<JvmObjectReference>;
|
||||
if (oo == null) throw new Exception(o.ToString() + " is not socket info "+typeof(List<JvmObjectReference>)+" "+o.GetType());
|
||||
return new SocketInfo(int.Parse(oo[0].ObjectToString()), oo[1].ObjectToString());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -13,7 +13,7 @@ namespace Microsoft.Spark.CSharp.Proxy
|
|||
IRDDProxy JavaToCSharp();
|
||||
string GetQueryExecution();
|
||||
string GetExecutedPlan();
|
||||
string GetShowString(int numberOfRows, bool truncate);
|
||||
string GetShowString(int numberOfRows, int truncate, bool vertical);
|
||||
bool IsLocal();
|
||||
IStructTypeProxy GetSchema();
|
||||
IRDDProxy ToJSON();
|
||||
|
@ -59,7 +59,9 @@ namespace Microsoft.Spark.CSharp.Proxy
|
|||
IDataFrameProxy Repartition(int numPartitions, IColumnProxy[] columns);
|
||||
IDataFrameProxy Repartition(IColumnProxy[] columns);
|
||||
IDataFrameProxy Sample(bool withReplacement, double fraction, long seed);
|
||||
IDataFrameWriterProxy Write();
|
||||
IDataFrameProxy Broadcast();
|
||||
|
||||
IDataFrameWriterProxy Write();
|
||||
}
|
||||
|
||||
internal interface IUDFProxy
|
||||
|
|
|
@ -7,6 +7,7 @@ using System.Linq;
|
|||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Proxy
|
||||
{
|
||||
|
@ -41,6 +42,6 @@ namespace Microsoft.Spark.CSharp.Proxy
|
|||
void SaveAsSequenceFile(string path, string compressionCodecClass);
|
||||
void SaveAsTextFile(string path, string compressionCodecClass);
|
||||
long Count();
|
||||
int CollectAndServe();
|
||||
SocketInfo CollectAndServe();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ using System.Text;
|
|||
using System.Threading.Tasks;
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Interop;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Proxy
|
||||
|
@ -50,7 +51,7 @@ namespace Microsoft.Spark.CSharp.Proxy
|
|||
void CancelJobGroup(string groupId);
|
||||
void CancelAllJobs();
|
||||
IStatusTrackerProxy StatusTracker { get; }
|
||||
int RunJob(IRDDProxy rdd, IEnumerable<int> partitions);
|
||||
SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions);
|
||||
IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId);
|
||||
IRDDProxy CreateCSharpRdd(IRDDProxy prefvJavaRddReference, byte[] command, Dictionary<string, string> environmentVariables, List<string> pythonIncludes, bool preservePartitioning, List<Broadcast> broadcastVariables, List<byte[]> accumulator);
|
||||
IRDDProxy CreatePairwiseRDD(IRDDProxy javaReferenceInByteArrayRdd, int numPartitions, long partitionFuncId);
|
||||
|
|
|
@ -79,12 +79,12 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
|||
return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(executedPlanReference, "toString", new object[] { }).ToString();
|
||||
}
|
||||
|
||||
public string GetShowString(int numberOfRows, bool truncate)
|
||||
public string GetShowString(int numberOfRows, int truncate, bool vertical)
|
||||
{
|
||||
return
|
||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
|
||||
jvmDataFrameReference, "showString",
|
||||
new object[] { numberOfRows, truncate }).ToString();
|
||||
new object[] { numberOfRows, truncate, vertical}).ToString();
|
||||
}
|
||||
|
||||
public bool IsLocal()
|
||||
|
@ -575,7 +575,16 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
|||
new object[] { withReplacement, fraction, seed }).ToString()), sqlContextProxy);
|
||||
}
|
||||
|
||||
public IDataFrameWriterProxy Write()
|
||||
public IDataFrameProxy Broadcast()
|
||||
{
|
||||
return
|
||||
new DataFrameIpcProxy(
|
||||
new JvmObjectReference(
|
||||
SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.functions", "broadcast",
|
||||
new object[] { jvmDataFrameReference }).ToString()), sqlContextProxy);
|
||||
}
|
||||
|
||||
public IDataFrameWriterProxy Write()
|
||||
{
|
||||
return new DataFrameWriterIpcProxy(new JvmObjectReference(
|
||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDataFrameReference, "write").ToString()));
|
||||
|
|
|
@ -12,6 +12,7 @@ using System.Threading.Tasks;
|
|||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Interop;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||
{
|
||||
|
@ -66,10 +67,10 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
|||
return long.Parse(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(rdd, "count").ToString());
|
||||
}
|
||||
|
||||
public int CollectAndServe()
|
||||
public SocketInfo CollectAndServe()
|
||||
{
|
||||
var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd"));
|
||||
return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "collectAndServe", new object[] { rdd }).ToString());
|
||||
return SocketInfo.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "collectAndServe", new object[] { rdd }));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ using System.Threading.Tasks;
|
|||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Interop;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
||||
|
@ -134,10 +135,8 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
|||
|
||||
public void Accumulator(int port)
|
||||
{
|
||||
jvmAccumulatorReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "accumulator",
|
||||
SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList"),
|
||||
SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonAccumulatorParam", IPAddress.Loopback.ToString(), port)
|
||||
));
|
||||
jvmAccumulatorReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonAccumulatorV2", IPAddress.Loopback.ToString(), port);
|
||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkContextReference, "register", new object[] { jvmAccumulatorReference });
|
||||
}
|
||||
|
||||
public void Stop()
|
||||
|
@ -241,7 +240,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
|||
|
||||
public void SetJobGroup(string groupId, string description, bool interruptOnCancel)
|
||||
{
|
||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "setCheckpointDir", new object[] { groupId, description, interruptOnCancel });
|
||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "setJobGroup", new object[] { groupId, description, interruptOnCancel });
|
||||
}
|
||||
|
||||
public void SetLocalProperty(string key, string value)
|
||||
|
@ -344,10 +343,10 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
|||
|
||||
}
|
||||
|
||||
public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
|
||||
public SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
|
||||
{
|
||||
var jpartitions = JvmBridgeUtils.GetJavaList<int>(partitions);
|
||||
return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }).ToString());
|
||||
return SocketInfo.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }));
|
||||
}
|
||||
|
||||
public IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId)
|
||||
|
|
|
@ -27,7 +27,9 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
|||
}
|
||||
}
|
||||
|
||||
public ISqlContextProxy SqlContextProxy
|
||||
internal JvmObjectReference JvmReference => jvmSparkSessionReference;
|
||||
|
||||
public ISqlContextProxy SqlContextProxy
|
||||
{
|
||||
get { return sqlContextProxy; }
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
|
|||
|
||||
var udf = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.execution.python.UserDefinedPythonFunction", new object[]
|
||||
{
|
||||
name, function, dt
|
||||
name, function, dt, 100 /*BatchUDF*/, true /*deterministic*/
|
||||
});
|
||||
|
||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython", new object[] { name, udf });
|
||||
|
|
|
@ -6,7 +6,9 @@ using System.Collections.Generic;
|
|||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using Microsoft.Spark.CSharp.Proxy;
|
||||
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||
using Microsoft.Spark.CSharp.Services;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Sql
|
||||
|
@ -66,10 +68,12 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if the collect and take methods can be run locally (without any Spark executors).
|
||||
/// </summary>
|
||||
public bool IsLocal
|
||||
internal JvmObjectReference JvmReference => (dataFrameProxy as DataFrameIpcProxy)?.JvmDataFrameReference;
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if the collect and take methods can be run locally (without any Spark executors).
|
||||
/// </summary>
|
||||
public bool IsLocal
|
||||
{
|
||||
get
|
||||
{
|
||||
|
@ -145,10 +149,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
/// </summary>
|
||||
/// <param name="numberOfRows">Number of rows to display - default 20</param>
|
||||
/// <param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
|
||||
public void Show(int numberOfRows = 20, bool truncate = true)
|
||||
/// <param name="vertical">If set to True, print output rows vertically (one line per column value).</param>
|
||||
public void Show(int numberOfRows = 20, int truncate = 20, bool vertical = false)
|
||||
{
|
||||
logger.LogInfo("Writing {0} rows in the DataFrame to Console output", numberOfRows);
|
||||
Console.WriteLine(dataFrameProxy.GetShowString(numberOfRows, truncate));
|
||||
Console.WriteLine(dataFrameProxy.GetShowString(numberOfRows, truncate, vertical));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -166,8 +171,8 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
/// </summary>
|
||||
public IEnumerable<Row> Collect()
|
||||
{
|
||||
int port = RddProxy.CollectAndServe();
|
||||
return Rdd.Collect(port).Cast<Row>();
|
||||
var info = RddProxy.CollectAndServe();
|
||||
return Rdd.Collect(info).Cast<Row>();
|
||||
}
|
||||
|
||||
//TODO - add this method if needed to convert Row to collection of T
|
||||
|
@ -917,10 +922,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
/// <summary>
|
||||
/// Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
|
||||
/// </summary>
|
||||
/// <param name="type">Persist storage type</param>
|
||||
// Python API: https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py persist(self, storageLevel)
|
||||
public DataFrame Persist()
|
||||
public DataFrame Persist(StorageLevelType type= StorageLevelType.MEMORY_AND_DISK)
|
||||
{
|
||||
dataFrameProxy.Persist(StorageLevelType.MEMORY_AND_DISK);
|
||||
dataFrameProxy.Persist(type);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -944,6 +950,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
return Persist();
|
||||
}
|
||||
|
||||
public DataFrame Broadcast()
|
||||
{
|
||||
return new DataFrame(dataFrameProxy.Broadcast(), sparkContext);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a new DataFrame that has exactly `numPartitions` partitions.
|
||||
/// </summary>
|
||||
|
|
|
@ -159,5 +159,18 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
logger.LogInfo("Constructing DataFrame using Parquet source {0}", string.Join(";", path));
|
||||
return new DataFrame(dataFrameReaderProxy.Parquet(path), sparkContext);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Loads a AVRO file (one object per line) and returns the result as a DataFrame.
|
||||
///
|
||||
/// This function goes through the input once to determine the input schema. If you know the
|
||||
/// schema in advance, use the version that specifies the schema to avoid the extra scan.
|
||||
/// </summary>
|
||||
/// <param name="path">input path</param>
|
||||
public DataFrame Avro(string path)
|
||||
{
|
||||
logger.LogInfo("Constructing DataFrame using AVRO source {0}", path);
|
||||
return Format("com.databricks.spark.avro").Load(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -170,5 +170,16 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
{
|
||||
Format("parquet").Save(path);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Saves the content of the DataFrame in AVRO format at the specified path.
|
||||
/// This is equivalent to:
|
||||
/// Format("com.databricks.spark.avro").Save(path)
|
||||
/// </summary>
|
||||
public void Avro(string path)
|
||||
{
|
||||
Format("com.databricks.spark.avro").Save(path);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,7 +92,8 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
/// </summary>
|
||||
/// <param name="numberOfRows">Number of rows - default is 20</param>
|
||||
/// <param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
|
||||
public void Show(int numberOfRows = 20, bool truncate = true)
|
||||
/// <param name="vertical">If set to true, prints output rows vertically (one line per column value).</param>
|
||||
public void Show(int numberOfRows = 20, int truncate = 20, bool vertical = false)
|
||||
{
|
||||
ToDF().Show(numberOfRows, truncate);
|
||||
}
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Runtime.Serialization;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
|
@ -1119,5 +1121,42 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
return input.Select(a => func((A1)(a[0]), (A2)(a[1]), (A3)(a[2]), (A4)(a[3]), (A5)(a[4]), (A6)(a[5]), (A7)(a[6]), (A8)(a[7]), (A9)(a[8]), (A10)(a[9]))).Cast<dynamic>();
|
||||
}
|
||||
}
|
||||
#endregion
|
||||
|
||||
[Serializable]
|
||||
internal class UdfReflectionHelper
|
||||
{
|
||||
private readonly MethodInfo func;
|
||||
|
||||
[NonSerialized]
|
||||
private object[] _cache;
|
||||
|
||||
internal UdfReflectionHelper(MethodInfo f)
|
||||
{
|
||||
func = f;
|
||||
_cache = new object[func.GetParameters().Length];
|
||||
}
|
||||
|
||||
public Type ReturnType => func.ReturnType;
|
||||
|
||||
[OnDeserialized()]
|
||||
public void Init(StreamingContext context)
|
||||
{
|
||||
_cache = new object[func.GetParameters().Length];
|
||||
}
|
||||
|
||||
internal IEnumerable<dynamic> Execute(int pid, IEnumerable<dynamic> input)
|
||||
{
|
||||
return input.Select(Run).Cast<dynamic>();
|
||||
}
|
||||
|
||||
private dynamic Run(dynamic input)
|
||||
{
|
||||
for (int i = 0; i < _cache.Length; ++i)
|
||||
{
|
||||
_cache[i] = input[i];
|
||||
}
|
||||
return func.Invoke(null, _cache);
|
||||
}
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
|
|
|
@ -18,17 +18,24 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
[NonSerialized]
|
||||
private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(Row));
|
||||
|
||||
/// <summary>
|
||||
/// Number of elements in the Row.
|
||||
/// </summary>
|
||||
/// <returns>elements count in this row</returns>
|
||||
public abstract int Size();
|
||||
public abstract dynamic[] Values { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of elements in the Row.
|
||||
/// </summary>
|
||||
/// <returns>elements count in this row</returns>
|
||||
public abstract int Size();
|
||||
|
||||
/// <summary>
|
||||
/// Schema for the row.
|
||||
/// </summary>
|
||||
public abstract StructType GetSchema();
|
||||
|
||||
public virtual void ResetValues(dynamic[] values)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the value at position i.
|
||||
/// </summary>
|
||||
|
@ -80,8 +87,22 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
internal class RowImpl : Row
|
||||
{
|
||||
private readonly StructType schema;
|
||||
public dynamic[] Values { get { return values; } }
|
||||
private readonly dynamic[] values;
|
||||
|
||||
public override dynamic[] Values
|
||||
{
|
||||
get
|
||||
{
|
||||
if (!valuesConverted)
|
||||
{
|
||||
schema.ConvertPickleObjects(rawValues,rawValues);
|
||||
valuesConverted = true;
|
||||
}
|
||||
return rawValues;
|
||||
}
|
||||
}
|
||||
|
||||
private dynamic[] rawValues;
|
||||
private bool valuesConverted = false;
|
||||
|
||||
private readonly int columnCount;
|
||||
|
||||
|
@ -96,11 +117,11 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
{
|
||||
if (data is dynamic[])
|
||||
{
|
||||
values = data as dynamic[];
|
||||
rawValues = data as dynamic[];
|
||||
}
|
||||
else if (data is List<dynamic>)
|
||||
{
|
||||
values = (data as List<dynamic>).ToArray();
|
||||
rawValues = (data as List<dynamic>).ToArray();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -109,17 +130,25 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
|
||||
this.schema = schema;
|
||||
|
||||
columnCount = values.Count();
|
||||
int schemaColumnCount = this.schema.Fields.Count();
|
||||
columnCount = rawValues.Length;
|
||||
int schemaColumnCount = this.schema.Fields.Count;
|
||||
if (columnCount != schemaColumnCount)
|
||||
{
|
||||
throw new Exception(string.Format("column count inferred from data ({0}) and schema ({1}) mismatch", columnCount, schemaColumnCount));
|
||||
}
|
||||
|
||||
Initialize();
|
||||
}
|
||||
|
||||
public override int Size()
|
||||
public override void ResetValues(dynamic[] values)
|
||||
{
|
||||
if (columnCount != values.Length)
|
||||
{
|
||||
throw new ArgumentException("column count inferred from data and schema mismatch");
|
||||
}
|
||||
rawValues = values;
|
||||
valuesConverted = false;
|
||||
}
|
||||
|
||||
public override int Size()
|
||||
{
|
||||
return columnCount;
|
||||
}
|
||||
|
@ -131,16 +160,15 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
|
||||
public override dynamic Get(int i)
|
||||
{
|
||||
if (i >= 0 && i < columnCount) return Values[i];
|
||||
if (i >= columnCount)
|
||||
{
|
||||
throw new Exception(string.Format("i ({0}) >= columnCount ({1})", i, columnCount));
|
||||
}
|
||||
else if(i < 0)
|
||||
else
|
||||
{
|
||||
throw new Exception(string.Format("i ({0}) < 0", i));
|
||||
}
|
||||
|
||||
return values[i];
|
||||
}
|
||||
|
||||
public override dynamic Get(string columnName)
|
||||
|
@ -152,7 +180,7 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
public override string ToString()
|
||||
{
|
||||
List<string> cols = new List<string>();
|
||||
foreach (var item in values)
|
||||
foreach (var item in Values)
|
||||
{
|
||||
if (item != null)
|
||||
{
|
||||
|
@ -166,73 +194,7 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
|
||||
return string.Format("[{0}]", string.Join(",", cols.ToArray()));
|
||||
}
|
||||
|
||||
|
||||
private void Initialize()
|
||||
{
|
||||
|
||||
int index = 0;
|
||||
foreach (var field in schema.Fields)
|
||||
{
|
||||
if (field.DataType is ArrayType)
|
||||
{
|
||||
Func<DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
|
||||
{
|
||||
StructField[] fields = new StructField[length];
|
||||
for(int i = 0; i < length ; i++)
|
||||
{
|
||||
fields[i] = new StructField(string.Format("_array_{0}", i), dataType);
|
||||
}
|
||||
return new StructType(fields);
|
||||
};
|
||||
var elementType = (field.DataType as ArrayType).ElementType;
|
||||
|
||||
// Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)),
|
||||
// then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
|
||||
// In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
|
||||
object[] valueOfArray = values[index] is ArrayList ? (values[index] as ArrayList).ToArray() : values[index] as object[];
|
||||
if (valueOfArray == null)
|
||||
{
|
||||
throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
|
||||
}
|
||||
|
||||
values[index] = new RowImpl(valueOfArray, elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).values;
|
||||
}
|
||||
else if (field.DataType is MapType)
|
||||
{
|
||||
//TODO
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
else if (field.DataType is StructType)
|
||||
{
|
||||
dynamic value = values[index];
|
||||
if (value != null)
|
||||
{
|
||||
var subRow = new RowImpl(values[index], field.DataType as StructType);
|
||||
values[index] = subRow;
|
||||
}
|
||||
}
|
||||
else if (field.DataType is DecimalType)
|
||||
{
|
||||
//TODO
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
else if (field.DataType is DateType)
|
||||
{
|
||||
//TODO
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
else if (field.DataType is StringType)
|
||||
{
|
||||
if (values[index] != null) values[index] = values[index].ToString();
|
||||
}
|
||||
else
|
||||
{
|
||||
values[index] = values[index];
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -78,7 +78,7 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
currentSchema = null;
|
||||
return row;
|
||||
}
|
||||
|
||||
|
||||
//removes objects of type RowConstructor and replacing them with actual values
|
||||
private object[] GetValues(object[] arguments)
|
||||
{
|
||||
|
@ -86,7 +86,7 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
int i = 0;
|
||||
foreach (var argument in arguments)
|
||||
{
|
||||
if (argument != null && argument.GetType() == typeof(RowConstructor))
|
||||
if (argument is RowConstructor)
|
||||
{
|
||||
values[i++] = (argument as RowConstructor).Values;
|
||||
}
|
||||
|
|
|
@ -9,7 +9,9 @@ using System.Runtime.Remoting.Contexts;
|
|||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using Microsoft.Spark.CSharp.Proxy;
|
||||
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||
using Microsoft.Spark.CSharp.Services;
|
||||
using Microsoft.Spark.CSharp.Sql.Catalog;
|
||||
|
||||
|
@ -42,10 +44,12 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
get { return catalog ?? (catalog = new Catalog.Catalog(SparkSessionProxy.GetCatalog())); }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Interface through which the user may access the underlying SparkContext.
|
||||
/// </summary>
|
||||
public SparkContext SparkContext { get; private set; }
|
||||
internal JvmObjectReference JvmReference => (sparkSessionProxy as SparkSessionIpcProxy)?.JvmReference;
|
||||
|
||||
/// <summary>
|
||||
/// Interface through which the user may access the underlying SparkContext.
|
||||
/// </summary>
|
||||
public SparkContext SparkContext { get; private set; }
|
||||
|
||||
public UdfRegistration Udf
|
||||
{
|
||||
|
@ -114,18 +118,30 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
|
||||
// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside.
|
||||
// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
|
||||
var rddRow = rdd.Map(r => r);
|
||||
var rddRow = rdd.MapPartitions(r => r.Select(rr => rr));
|
||||
rddRow.serializedMode = SerializedMode.Row;
|
||||
|
||||
return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), SparkContext);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the specified table as a <see cref="DataFrame"/>
|
||||
/// </summary>
|
||||
/// <param name="tableName"></param>
|
||||
/// <returns></returns>
|
||||
public DataFrame Table(string tableName)
|
||||
public DataFrame CreateDataFrame(RDD<Row> rdd, StructType schema)
|
||||
{
|
||||
// Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker.
|
||||
// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
|
||||
// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside.
|
||||
// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
|
||||
var rddRow = rdd.MapPartitions(rows => rows.Select(r => r.Values));
|
||||
rddRow.serializedMode = SerializedMode.Row;
|
||||
|
||||
return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), SparkContext);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the specified table as a <see cref="DataFrame"/>
|
||||
/// </summary>
|
||||
/// <param name="tableName"></param>
|
||||
/// <returns></returns>
|
||||
public DataFrame Table(string tableName)
|
||||
{
|
||||
return new DataFrame(sparkSessionProxy.Table(tableName), SparkContext);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Reflection;
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Proxy;
|
||||
using Microsoft.Spark.CSharp.Services;
|
||||
|
@ -150,13 +151,25 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers the given <see cref="DataFrame"/> as a temporary table in the catalog.
|
||||
/// Temporary tables exist only during the lifetime of this instance of SqlContext.
|
||||
/// </summary>
|
||||
/// <param name="dataFrame"></param>
|
||||
/// <param name="tableName"></param>
|
||||
public void RegisterDataFrameAsTable(DataFrame dataFrame, string tableName)
|
||||
public DataFrame CreateDataFrame(RDD<Row> rdd, StructType schema)
|
||||
{
|
||||
// Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker.
|
||||
// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
|
||||
// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside.
|
||||
// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
|
||||
var rddRow = rdd.Map(r => r);
|
||||
rddRow.serializedMode = SerializedMode.Row;
|
||||
|
||||
return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers the given <see cref="DataFrame"/> as a temporary table in the catalog.
|
||||
/// Temporary tables exist only during the lifetime of this instance of SqlContext.
|
||||
/// </summary>
|
||||
/// <param name="dataFrame"></param>
|
||||
/// <param name="tableName"></param>
|
||||
public void RegisterDataFrameAsTable(DataFrame dataFrame, string tableName)
|
||||
{
|
||||
sqlContextProxy.RegisterDataFrameAsTable(dataFrame.DataFrameProxy, tableName);
|
||||
}
|
||||
|
@ -527,6 +540,14 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
|
||||
sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
|
||||
public void RegisterFunction(string name, MethodInfo f)
|
||||
{
|
||||
logger.LogInfo("Name of the function to register {0}, method info", name, f.DeclaringType?.FullName + "." + f.Name);
|
||||
var helper = new UdfReflectionHelper(f);
|
||||
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = helper.Execute;
|
||||
sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(helper.ReturnType));
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -5,6 +5,7 @@ using System;
|
|||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
|
@ -249,6 +250,17 @@ namespace Microsoft.Spark.CSharp.Sql
|
|||
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
|
||||
udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
|
||||
public void RegisterFunction(string name, MethodInfo f)
|
||||
{
|
||||
if (!f.IsStatic)
|
||||
throw new InvalidOperationException(f.DeclaringType?.FullName + "." + f.Name +
|
||||
" is not a static method, can't be registered");
|
||||
logger.LogInfo("Name of the function to register {0}, method info", name, f.DeclaringType?.FullName + "." + f.Name);
|
||||
var helper = new UdfReflectionHelper(f);
|
||||
Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = helper.Execute;
|
||||
udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(helper.ReturnType));
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="log4net" version="2.0.5" targetFramework="net45" />
|
||||
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
|
||||
<package id="log4net" version="2.0.8" targetFramework="net45" />
|
||||
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
|
||||
<package id="Razorvine.Pyrolite" version="4.10.0" targetFramework="net45" />
|
||||
<package id="Razorvine.Serpent" version="1.12.0" targetFramework="net45" />
|
||||
</packages>
|
||||
</packages>
|
|
@ -3513,7 +3513,7 @@
|
|||
Close the socket connections and releases all associated resources.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
|
||||
<member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
|
||||
<summary>
|
||||
Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||
</summary>
|
||||
|
@ -3612,12 +3612,13 @@
|
|||
Close the ISocket connections and releases all associated resources.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
|
||||
<member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
|
||||
<summary>
|
||||
Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||
</summary>
|
||||
<param name="remoteaddr">The IP address of the remote host</param>
|
||||
<param name="port">The port number of the remote host</param>
|
||||
<param name="secret">The secret to connect, can be null</param>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.GetStream">
|
||||
<summary>
|
||||
|
@ -3770,7 +3771,7 @@
|
|||
Close the ISocket connections and releases all associated resources.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Network.RioSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
|
||||
<member name="M:Microsoft.Spark.CSharp.Network.RioSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
|
||||
<summary>
|
||||
Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||
</summary>
|
||||
|
@ -3912,7 +3913,7 @@
|
|||
Close the ISocket connections and releases all associated resources.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
|
||||
<member name="M:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
|
||||
<summary>
|
||||
Establishes a connection to a remote host that is specified by an IP address and a port number
|
||||
</summary>
|
||||
|
@ -5190,12 +5191,13 @@
|
|||
</summary>
|
||||
<returns>row count</returns>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Show(System.Int32,System.Boolean)">
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Show(System.Int32,System.Int32,System.Boolean)">
|
||||
<summary>
|
||||
Displays rows of the DataFrame in tabular form
|
||||
</summary>
|
||||
<param name="numberOfRows">Number of rows to display - default 20</param>
|
||||
<param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
|
||||
<param name="vertical">If set to True, print output rows vertically (one line per column value).</param>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.ShowSchema">
|
||||
<summary>
|
||||
|
@ -5627,10 +5629,11 @@
|
|||
the 100 new partitions will claim 10 of the current partitions.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Persist">
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Persist(Microsoft.Spark.CSharp.Core.StorageLevelType)">
|
||||
<summary>
|
||||
Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
|
||||
</summary>
|
||||
<param name="type">Persist storage type</param>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Unpersist(System.Boolean)">
|
||||
<summary>
|
||||
|
@ -6040,6 +6043,15 @@
|
|||
DataFrame if no paths are passed in.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrameReader.Avro(System.String)">
|
||||
<summary>
|
||||
Loads a AVRO file (one object per line) and returns the result as a DataFrame.
|
||||
|
||||
This function goes through the input once to determine the input schema. If you know the
|
||||
schema in advance, use the version that specifies the schema to avoid the extra scan.
|
||||
</summary>
|
||||
<param name="path">input path</param>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Sql.DataFrameWriter">
|
||||
<summary>
|
||||
Interface used to write a DataFrame to external storage systems (e.g. file systems,
|
||||
|
@ -6145,6 +6157,13 @@
|
|||
Format("parquet").Save(path)
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.DataFrameWriter.Avro(System.String)">
|
||||
<summary>
|
||||
Saves the content of the DataFrame in AVRO format at the specified path.
|
||||
This is equivalent to:
|
||||
Format("com.databricks.spark.avro").Save(path)
|
||||
</summary>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Sql.Dataset">
|
||||
<summary>
|
||||
Dataset is a strongly typed collection of domain-specific objects that can be transformed
|
||||
|
@ -6193,13 +6212,14 @@
|
|||
Returns all column names as an array.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Show(System.Int32,System.Boolean)">
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Show(System.Int32,System.Int32,System.Boolean)">
|
||||
<summary>
|
||||
Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters
|
||||
will be truncated, and all cells will be aligned right.
|
||||
</summary>
|
||||
<param name="numberOfRows">Number of rows - default is 20</param>
|
||||
<param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
|
||||
<param name="vertical">If set to true, prints output rows vertically (one line per column value).</param>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Sql.Dataset.ShowSchema">
|
||||
<summary>
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -33,7 +33,7 @@ namespace AdapterTest
|
|||
// get accumulator server port and connect to accumuator server
|
||||
int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort;
|
||||
sock = SocketFactory.CreateSocket();
|
||||
sock.Connect(IPAddress.Loopback, serverPort);
|
||||
sock.Connect(IPAddress.Loopback, serverPort, null);
|
||||
}
|
||||
|
||||
[TearDown]
|
||||
|
|
|
@ -35,22 +35,25 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="Moq, Version=4.2.1510.2205, Culture=neutral, PublicKeyToken=69f491c39445e920, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Moq.4.2.1510.2205\lib\net40\Moq.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json">
|
||||
<HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Pyrolite">
|
||||
<Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Serpent">
|
||||
<Reference Include="Razorvine.Serpent, Version=1.12.0.35091, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
|
|
|
@ -12,6 +12,7 @@ using Microsoft.Spark.CSharp.Sql;
|
|||
using Microsoft.Spark.CSharp.Proxy;
|
||||
using NUnit.Framework;
|
||||
using Moq;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
|
||||
namespace AdapterTest
|
||||
{
|
||||
|
@ -65,10 +66,10 @@ namespace AdapterTest
|
|||
[Test]
|
||||
public void TestShow()
|
||||
{
|
||||
mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
|
||||
mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
|
||||
var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
|
||||
dataFrame.Show();
|
||||
mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
|
||||
mockDataFrameProxy.Verify(m => m.GetShowString(20, 20, false), Times.Once);
|
||||
}
|
||||
|
||||
[Test]
|
||||
|
@ -135,9 +136,9 @@ namespace AdapterTest
|
|||
var expectedRows = new Row[] {new MockRow(), new MockRow()};
|
||||
var mockRddProxy = new Mock<IRDDProxy>();
|
||||
var mockRddCollector = new Mock<IRDDCollector>();
|
||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||
.Returns(expectedRows);
|
||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
|
||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123,null));
|
||||
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
||||
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
||||
var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
|
||||
|
@ -838,9 +839,9 @@ namespace AdapterTest
|
|||
var expectedRows = new Row[] {new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow()};
|
||||
var mockRddProxy = new Mock<IRDDProxy>();
|
||||
var mockRddCollector = new Mock<IRDDCollector>();
|
||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||
.Returns(expectedRows);
|
||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
|
||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
|
||||
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
||||
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
||||
mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
|
||||
|
@ -868,9 +869,9 @@ namespace AdapterTest
|
|||
var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
|
||||
var mockRddProxy = new Mock<IRDDProxy>();
|
||||
var mockRddCollector = new Mock<IRDDCollector>();
|
||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||
.Returns(expectedRows);
|
||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
|
||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
|
||||
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
||||
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
||||
mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
|
||||
|
@ -892,9 +893,9 @@ namespace AdapterTest
|
|||
var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
|
||||
var mockRddProxy = new Mock<IRDDProxy>();
|
||||
var mockRddCollector = new Mock<IRDDCollector>();
|
||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||
mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
|
||||
.Returns(expectedRows);
|
||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
|
||||
mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
|
||||
mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
|
||||
mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
|
||||
mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
|
||||
|
|
|
@ -38,12 +38,12 @@ namespace AdapterTest
|
|||
public void TestShow()
|
||||
{
|
||||
Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
|
||||
mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
|
||||
mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
|
||||
mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
|
||||
|
||||
var dataset = new Dataset(mockDatasetProxy.Object);
|
||||
dataset.Show();
|
||||
mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
|
||||
mockDataFrameProxy.Verify(m => m.GetShowString(20, 20, false), Times.Once);
|
||||
}
|
||||
|
||||
[Test]
|
||||
|
|
|
@ -9,6 +9,7 @@ using System.Threading.Tasks;
|
|||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.IO;
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Sql;
|
||||
using Razorvine.Pickle;
|
||||
using Microsoft.Spark.CSharp.Proxy;
|
||||
|
@ -64,7 +65,7 @@ namespace AdapterTest.Mocks
|
|||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public string GetShowString(int numberOfRows, bool truncate)
|
||||
public string GetShowString(int numberOfRows, int truncate, bool vertical)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
@ -240,7 +241,12 @@ namespace AdapterTest.Mocks
|
|||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public IDataFrameWriterProxy Write()
|
||||
public IDataFrameProxy Broadcast()
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public IDataFrameWriterProxy Write()
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
|
|
@ -4,12 +4,13 @@ using System.Linq;
|
|||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
|
||||
namespace AdapterTest.Mocks
|
||||
{
|
||||
class MockRDDCollector : IRDDCollector
|
||||
{
|
||||
public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
|
||||
public IEnumerable<dynamic> Collect(SocketInfo port, SerializedMode serializedMode, Type type)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ using Microsoft.Spark.CSharp.Core;
|
|||
using Microsoft.Spark.CSharp.Proxy;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using NUnit.Framework;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
|
||||
namespace AdapterTest.Mocks
|
||||
{
|
||||
|
@ -60,7 +61,7 @@ namespace AdapterTest.Mocks
|
|||
return union;
|
||||
}
|
||||
|
||||
public int CollectAndServe()
|
||||
public SocketInfo CollectAndServe()
|
||||
{
|
||||
return MockSparkContextProxy.RunJob(this);
|
||||
}
|
||||
|
|
|
@ -8,6 +8,13 @@ namespace AdapterTest.Mocks
|
|||
{
|
||||
public class MockRow : Row
|
||||
{
|
||||
public override dynamic[] Values
|
||||
{
|
||||
get
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
|
||||
public override int Size()
|
||||
{
|
||||
|
|
|
@ -195,7 +195,7 @@ namespace AdapterTest.Mocks
|
|||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
internal static int RunJob(IRDDProxy rdd)
|
||||
internal static SocketInfo RunJob(IRDDProxy rdd)
|
||||
{
|
||||
var mockRdd = (rdd as MockRddProxy);
|
||||
IEnumerable<byte[]> result = mockRdd.pickle ? mockRdd.result.Cast<byte[]>() :
|
||||
|
@ -222,10 +222,12 @@ namespace AdapterTest.Mocks
|
|||
ns.Flush();
|
||||
}
|
||||
});
|
||||
return (listener.LocalEndPoint as IPEndPoint).Port;
|
||||
|
||||
SocketInfo socketInfo = new SocketInfo((listener.LocalEndPoint as IPEndPoint).Port, null);
|
||||
return socketInfo;
|
||||
}
|
||||
|
||||
public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
|
||||
public SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
|
||||
{
|
||||
return RunJob(rdd);
|
||||
}
|
||||
|
|
|
@ -86,9 +86,9 @@ namespace AdapterTest
|
|||
Assert.Throws<InvalidOperationException>(() => clientSock.GetStream());
|
||||
Assert.Throws<InvalidOperationException>(() => clientSock.Receive());
|
||||
Assert.Throws<InvalidOperationException>(() => clientSock.Send(null));
|
||||
Assert.Throws<SocketException>(() => clientSock.Connect(IPAddress.Any, 1024));
|
||||
Assert.Throws<SocketException>(() => clientSock.Connect(IPAddress.Any, 1024, null));
|
||||
|
||||
clientSock.Connect(IPAddress.Loopback, port);
|
||||
clientSock.Connect(IPAddress.Loopback, port, null);
|
||||
|
||||
// Valid invalid operation
|
||||
var byteBuf = ByteBufPool.Default.Allocate();
|
||||
|
|
|
@ -80,7 +80,7 @@ namespace AdapterTest
|
|||
ns.Flush();
|
||||
}
|
||||
});
|
||||
return (listener.LocalEndPoint as IPEndPoint).Port;
|
||||
return new SocketInfo((listener.LocalEndPoint as IPEndPoint).Port, null);
|
||||
});
|
||||
_mockRddProxy.Setup(m => m.RDDCollector).Returns(new RDDCollector());
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="log4net" version="2.0.8" targetFramework="net45" />
|
||||
<package id="Moq" version="4.2.1510.2205" targetFramework="net45" />
|
||||
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
|
||||
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
|
||||
<package id="NUnit" version="3.0.1" targetFramework="net45" />
|
||||
<package id="NUnit.Console" version="3.0.1" developmentDependency="true" />
|
||||
<package id="OpenCover" version="4.6.166" targetFramework="net45" developmentDependency="true" />
|
||||
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
|
||||
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
|
||||
<package id="OpenCover" version="4.6.166" targetFramework="net45" developmentDependency="true" />
|
||||
</packages>
|
|
@ -34,6 +34,9 @@
|
|||
<Prefer32Bit>false</Prefer32Bit>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.CodeAnalysis, Version=1.2.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.dll</HintPath>
|
||||
|
@ -50,11 +53,13 @@
|
|||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.Scripting.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Serpent">
|
||||
<Reference Include="Razorvine.Serpent, Version=1.12.0.35091, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="log4net" version="2.0.5" targetFramework="net45" />
|
||||
<package id="log4net" version="2.0.8" targetFramework="net461" />
|
||||
<package id="Microsoft.Net.Compilers" version="1.1.1" targetFramework="net45" />
|
||||
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
|
||||
<package id="Razorvine.Pyrolite" version="4.10.0" targetFramework="net45" />
|
||||
<package id="Razorvine.Serpent" version="1.12.0" targetFramework="net45" />
|
||||
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net461" />
|
||||
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net461" />
|
||||
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net461" />
|
||||
</packages>
|
|
@ -1867,5 +1867,72 @@ namespace Microsoft.Spark.CSharp.Samples
|
|||
SparkCLRSamples.FileSystemHelper.DeleteDirectory(path, true);
|
||||
Console.WriteLine("Remove directory: {0}", path);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Single UDF Sample
|
||||
/// </summary>
|
||||
[Sample]
|
||||
internal static void SingleUDFSample()
|
||||
{
|
||||
var sqlContext = GetSqlContext();
|
||||
var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
|
||||
peopleDataFrame.RegisterTempTable("peopleDataFrame");
|
||||
|
||||
sqlContext.RegisterFunction("UDF", (int x, int y) => { return x + y; });
|
||||
|
||||
var rowSet = sqlContext.Sql("SELECT * FROM peopleDataFrame where UDF(age, 20) > 60");
|
||||
|
||||
rowSet.Show();
|
||||
|
||||
if (SparkCLRSamples.Configuration.IsValidationEnabled)
|
||||
{
|
||||
Assert.AreEqual(rowSet.Count() ,2);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Single UDF Sample with duplicate values
|
||||
/// </summary>
|
||||
[Sample]
|
||||
internal static void SingleUDFWithDupSample()
|
||||
{
|
||||
var sqlContext = GetSqlContext();
|
||||
var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
|
||||
peopleDataFrame.RegisterTempTable("peopleDataFrame");
|
||||
|
||||
sqlContext.RegisterFunction("UDF", (int x, int y) => { return x + y; });
|
||||
|
||||
var rowSet = sqlContext.Sql("SELECT * FROM peopleDataFrame where UDF(age, age) < 50");
|
||||
|
||||
rowSet.Show();
|
||||
|
||||
if (SparkCLRSamples.Configuration.IsValidationEnabled)
|
||||
{
|
||||
Assert.AreEqual(rowSet.Count(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiple UDFs sample
|
||||
/// </summary>
|
||||
[Sample]
|
||||
internal static void MultipleUDFSample()
|
||||
{
|
||||
var sqlContext = GetSqlContext();
|
||||
var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
|
||||
peopleDataFrame.RegisterTempTable("peopleDataFrame");
|
||||
|
||||
sqlContext.RegisterFunction("UDF1", (int x, int y) => { return x + y; });
|
||||
sqlContext.RegisterFunction("UDF2", (string name, string id) => { return name + ":" + id; });
|
||||
|
||||
var rowSet = sqlContext.Sql("SELECT id, name, UDF1(age, 20) AS UDF1, UDF2(name, id) AS UDF2 FROM peopleDataFrame where UDF1(age, 20) > 60");
|
||||
|
||||
rowSet.Show();
|
||||
|
||||
if (SparkCLRSamples.Configuration.IsValidationEnabled)
|
||||
{
|
||||
Assert.AreEqual(rowSet.Count(), 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,8 +66,10 @@ namespace Microsoft.Spark.CSharp.Samples
|
|||
|
||||
if (Configuration.IsValidationEnabled && !status)
|
||||
{
|
||||
Environment.Exit(1);
|
||||
Environment.Exit(2);
|
||||
}
|
||||
|
||||
Environment.Exit(1);
|
||||
}
|
||||
|
||||
// Creates and returns a context
|
||||
|
|
|
@ -33,9 +33,11 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="Newtonsoft.Json, Version=7.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<HintPath>..\..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<HintPath>..\..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
|
||||
<HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
|
||||
<package id="log4net" version="2.0.8" targetFramework="net45" />
|
||||
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
|
||||
<package id="NUnit" version="3.0.1" targetFramework="net45" />
|
||||
</packages>
|
|
@ -36,11 +36,10 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="Newtonsoft.Json">
|
||||
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Pyrolite">
|
||||
<HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||
</Reference>
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Utils.FileSystem
|
||||
{
|
||||
/// <summary>
|
||||
/// See https://hadoop.apache.org/docs/r2.6.1/api/org/apache/hadoop/fs/FileStatus.html
|
||||
/// </summary>
|
||||
public class HdfsFileStatus
|
||||
{
|
||||
public long Length => _status.Value.Length;
|
||||
public long ModificationTime => _status.Value.Time;
|
||||
public string Owner => _status.Value.Owner;
|
||||
public string Path => _status.Value.Path;
|
||||
public bool IsFile => _status.Value.IsFile;
|
||||
public bool IsDirectory => _status.Value.IsDirectory;
|
||||
public bool IsSymlink => _status.Value.IsSymlink;
|
||||
|
||||
private Lazy<Status> _status;
|
||||
|
||||
internal HdfsFileStatus(JvmObjectReference obj)
|
||||
{
|
||||
_status = new Lazy<Status>(()=>new Status(obj));
|
||||
}
|
||||
|
||||
private class Status
|
||||
{
|
||||
public long Length;
|
||||
public long Time;
|
||||
public string Owner;
|
||||
public string Path;
|
||||
public bool IsFile;
|
||||
public bool IsDirectory;
|
||||
public bool IsSymlink;
|
||||
|
||||
public Status(JvmObjectReference obj)
|
||||
{
|
||||
Length = (long) SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getLen");
|
||||
Time = (long)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getModificationTime");
|
||||
Owner = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getOwner");
|
||||
IsFile = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isFile");
|
||||
IsDirectory = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isDirectory");
|
||||
IsSymlink = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isSymlink");
|
||||
var pr = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getPath"));
|
||||
Path = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pr, "getName");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -4,8 +4,11 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
using Microsoft.Spark.CSharp.Interop;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using Microsoft.Spark.CSharp.Proxy.Ipc;
|
||||
using Microsoft.Spark.CSharp.Utils.FileSystem;
|
||||
|
||||
namespace Microsoft.Spark.CSharp.Utils
|
||||
{
|
||||
|
@ -18,7 +21,7 @@ namespace Microsoft.Spark.CSharp.Utils
|
|||
|
||||
public HdfsFileSystemHelper()
|
||||
{
|
||||
var jvmConfReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.conf.Configuration");
|
||||
var jvmConfReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.conf.Configuration");
|
||||
jvmHdfsReference = new JvmObjectReference((string) SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.hadoop.fs.FileSystem", "get", jvmConfReference));
|
||||
}
|
||||
|
||||
|
@ -39,16 +42,25 @@ namespace Microsoft.Spark.CSharp.Utils
|
|||
for (var i = 0; i < statusList.Count; i++)
|
||||
{
|
||||
var subPathJvmReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(statusList[i], "getPath"));
|
||||
files[i] = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(subPathJvmReference, "getName");
|
||||
files[i] = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(subPathJvmReference, "getName");
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Build a temp file path under '/tmp' path on HDFS.
|
||||
/// </summary>
|
||||
public string GetTempFileName()
|
||||
/// <summary>
|
||||
/// List the names of all the files under the given path.
|
||||
/// </summary>
|
||||
public IEnumerable<HdfsFileStatus> ListStatus(string path)
|
||||
{
|
||||
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||
return ((List<JvmObjectReference>)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "listStatus", pathJvmReference)).Select(r=>new HdfsFileStatus(r));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Build a temp file path under '/tmp' path on HDFS.
|
||||
/// </summary>
|
||||
public string GetTempFileName()
|
||||
{
|
||||
return "/tmp/" + Guid.NewGuid().ToString("N");
|
||||
}
|
||||
|
@ -91,5 +103,37 @@ namespace Microsoft.Spark.CSharp.Utils
|
|||
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||
return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "delete", pathJvmReference, recursive);
|
||||
}
|
||||
}
|
||||
|
||||
public bool IsFile(string path)
|
||||
{
|
||||
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||
return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "isFile", pathJvmReference);
|
||||
}
|
||||
|
||||
public bool IsDirectory(string path)
|
||||
{
|
||||
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||
return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "isDirectory", pathJvmReference);
|
||||
}
|
||||
|
||||
public bool Touch(string path)
|
||||
{
|
||||
var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
|
||||
return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "createNewFile", pathJvmReference);
|
||||
}
|
||||
|
||||
public void CopyFromLocalFile(string src, string dest)
|
||||
{
|
||||
var from = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", new Uri(src).AbsoluteUri);
|
||||
var to = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", dest);
|
||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "copyFromLocalFile", from, to);
|
||||
}
|
||||
|
||||
public void CopyToLocalFile(string src, string dest)
|
||||
{
|
||||
var to = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", new Uri(dest).AbsoluteUri);
|
||||
var from = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", src);
|
||||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "copyToLocalFile", from, to);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
<Reference Include="Microsoft.CSharp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="FileSystem\HdfsFileStatus.cs" />
|
||||
<Compile Include="FileSystem\LocalFileSystemHelper.cs" />
|
||||
<Compile Include="FileSystem\HdfsFileSystemHelper.cs" />
|
||||
<Compile Include="FileSystem\FileSystemHelper.cs" />
|
||||
|
|
|
@ -111,7 +111,8 @@ namespace Microsoft.Spark.CSharp
|
|||
|
||||
bool sparkReuseWorker = false;
|
||||
string envVar = Environment.GetEnvironmentVariable("SPARK_REUSE_WORKER"); // this envVar is set in JVM side
|
||||
if ((envVar != null) && envVar.Equals("1"))
|
||||
var secret = Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_SECRET");
|
||||
if ((envVar != null) && envVar.Equals("1"))
|
||||
{
|
||||
sparkReuseWorker = true;
|
||||
}
|
||||
|
@ -130,7 +131,7 @@ namespace Microsoft.Spark.CSharp
|
|||
SerDe.Write(s, trId); // write taskRunnerId to JVM side
|
||||
s.Flush();
|
||||
}
|
||||
TaskRunner taskRunner = new TaskRunner(trId, socket, sparkReuseWorker);
|
||||
TaskRunner taskRunner = new TaskRunner(trId, socket, sparkReuseWorker, secret);
|
||||
waitingTaskRunners.Add(taskRunner);
|
||||
taskRunnerRegistry[trId] = taskRunner;
|
||||
trId++;
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using Microsoft.Spark.CSharp.Configuration;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
|
@ -13,106 +15,116 @@ using Microsoft.Spark.CSharp.Services;
|
|||
[assembly: InternalsVisibleTo("WorkerTest")]
|
||||
namespace Microsoft.Spark.CSharp
|
||||
{
|
||||
/// <summary>
|
||||
/// TaskRunner is used to run Spark task assigned by JVM side. It uses a TCP socket to
|
||||
/// communicate with JVM side. This socket may be reused to run multiple Spark tasks.
|
||||
/// </summary>
|
||||
internal class TaskRunner
|
||||
{
|
||||
private static ILoggerService logger;
|
||||
private static ILoggerService Logger
|
||||
{
|
||||
get
|
||||
{
|
||||
if (logger != null) return logger;
|
||||
logger = LoggerServiceFactory.GetLogger(typeof(TaskRunner));
|
||||
return logger;
|
||||
}
|
||||
}
|
||||
/// <summary>
|
||||
/// TaskRunner is used to run Spark task assigned by JVM side. It uses a TCP socket to
|
||||
/// communicate with JVM side. This socket may be reused to run multiple Spark tasks.
|
||||
/// </summary>
|
||||
internal class TaskRunner
|
||||
{
|
||||
private static ILoggerService logger;
|
||||
private static ILoggerService Logger
|
||||
{
|
||||
get
|
||||
{
|
||||
if (logger != null) return logger;
|
||||
logger = LoggerServiceFactory.GetLogger(typeof(TaskRunner));
|
||||
return logger;
|
||||
}
|
||||
}
|
||||
|
||||
private readonly ISocketWrapper socket; // Socket to communicate with JVM
|
||||
private volatile bool stop;
|
||||
private readonly bool socketReuse; // whether the socket can be reused to run multiple Spark tasks
|
||||
private readonly ISocketWrapper socket; // Socket to communicate with JVM
|
||||
private volatile bool stop;
|
||||
private readonly bool socketReuse; // whether the socket can be reused to run multiple Spark tasks
|
||||
private string secret;
|
||||
|
||||
/// <summary>
|
||||
/// Task runner Id
|
||||
/// </summary>
|
||||
public int TaskId { get; private set; }
|
||||
/// <summary>
|
||||
/// Task runner Id
|
||||
/// </summary>
|
||||
public int TaskId { get; private set; }
|
||||
|
||||
public TaskRunner(int trId, ISocketWrapper socket, bool socketReuse)
|
||||
{
|
||||
TaskId = trId;
|
||||
this.socket = socket;
|
||||
this.socketReuse = socketReuse;
|
||||
}
|
||||
public TaskRunner(int trId, ISocketWrapper socket, bool socketReuse, string secret)
|
||||
{
|
||||
TaskId = trId;
|
||||
this.socket = socket;
|
||||
this.socketReuse = socketReuse;
|
||||
this.secret = secret;
|
||||
}
|
||||
|
||||
public void Run()
|
||||
{
|
||||
Logger.LogInfo("TaskRunner [{0}] is running ...", TaskId);
|
||||
public void Run()
|
||||
{
|
||||
Logger.LogInfo("TaskRunner [{0}] is running ...", TaskId);
|
||||
|
||||
try
|
||||
{
|
||||
while (!stop)
|
||||
{
|
||||
using (var inputStream = socket.GetInputStream())
|
||||
using (var outputStream = socket.GetOutputStream())
|
||||
{
|
||||
byte[] bytes = SerDe.ReadBytes(inputStream, sizeof(int));
|
||||
if (bytes != null)
|
||||
{
|
||||
int splitIndex = SerDe.ToInt(bytes);
|
||||
bool readComplete = Worker.ProcessStream(inputStream, outputStream, splitIndex);
|
||||
outputStream.Flush();
|
||||
if (!readComplete) // if the socket is not read through completely, then it can't be reused
|
||||
{
|
||||
stop = true;
|
||||
// wait for server to complete, otherwise server may get 'connection reset' exception
|
||||
Logger.LogInfo("Sleep 500 millisecond to close socket ...");
|
||||
Thread.Sleep(500);
|
||||
}
|
||||
else if (!socketReuse)
|
||||
{
|
||||
stop = true;
|
||||
// wait for server to complete, otherwise server gets 'connection reset' exception
|
||||
// Use SerDe.ReadBytes() to detect java side has closed socket properly
|
||||
// ReadBytes() will block until the socket is closed
|
||||
Logger.LogInfo("waiting JVM side to close socket...");
|
||||
SerDe.ReadBytes(inputStream);
|
||||
Logger.LogInfo("JVM side has closed socket");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
stop = true;
|
||||
Logger.LogWarn("read null splitIndex, socket is closed by JVM");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
stop = true;
|
||||
Logger.LogError("TaskRunner [{0}] exeption, will dispose this TaskRunner", TaskId);
|
||||
Logger.LogException(e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
try
|
||||
{
|
||||
socket.Close();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.LogWarn("close socket exception: {0}", ex);
|
||||
}
|
||||
Logger.LogInfo("TaskRunner [{0}] finished", TaskId);
|
||||
}
|
||||
}
|
||||
try
|
||||
{
|
||||
while (!stop)
|
||||
{
|
||||
using (var inputStream = socket.GetInputStream())
|
||||
using (var outputStream = socket.GetOutputStream())
|
||||
{
|
||||
if (!string.IsNullOrEmpty(secret))
|
||||
{
|
||||
SerDe.Write(outputStream, secret);
|
||||
outputStream.Flush();
|
||||
var reply = SerDe.ReadString(inputStream);
|
||||
Logger.LogDebug("Connect back to JVM: " + reply);
|
||||
secret = null;
|
||||
}
|
||||
byte[] bytes = SerDe.ReadBytes(inputStream, sizeof(int));
|
||||
if (bytes != null)
|
||||
{
|
||||
int splitIndex = SerDe.ToInt(bytes);
|
||||
bool readComplete = Worker.ProcessStream(inputStream, outputStream, splitIndex);
|
||||
outputStream.Flush();
|
||||
if (!readComplete) // if the socket is not read through completely, then it can't be reused
|
||||
{
|
||||
stop = true;
|
||||
// wait for server to complete, otherwise server may get 'connection reset' exception
|
||||
Logger.LogInfo("Sleep 500 millisecond to close socket ...");
|
||||
Thread.Sleep(500);
|
||||
}
|
||||
else if (!socketReuse)
|
||||
{
|
||||
stop = true;
|
||||
// wait for server to complete, otherwise server gets 'connection reset' exception
|
||||
// Use SerDe.ReadBytes() to detect java side has closed socket properly
|
||||
// ReadBytes() will block until the socket is closed
|
||||
Logger.LogInfo("waiting JVM side to close socket...");
|
||||
SerDe.ReadBytes(inputStream);
|
||||
Logger.LogInfo("JVM side has closed socket");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
stop = true;
|
||||
Logger.LogWarn("read null splitIndex, socket is closed by JVM");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
stop = true;
|
||||
Logger.LogError("TaskRunner [{0}] exeption, will dispose this TaskRunner", TaskId);
|
||||
Logger.LogException(e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
try
|
||||
{
|
||||
socket.Close();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.LogWarn("close socket exception: {0}", ex);
|
||||
}
|
||||
Logger.LogInfo("TaskRunner [{0}] finished", TaskId);
|
||||
}
|
||||
}
|
||||
|
||||
public void Stop()
|
||||
{
|
||||
Logger.LogInfo("try to stop TaskRunner [{0}]", TaskId);
|
||||
stop = true;
|
||||
}
|
||||
}
|
||||
public void Stop()
|
||||
{
|
||||
Logger.LogInfo("try to stop TaskRunner [{0}]", TaskId);
|
||||
stop = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,391 @@
|
|||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using Microsoft.Spark.CSharp.Services;
|
||||
using Microsoft.Spark.CSharp.Sql;
|
||||
using Razorvine.Pickle;
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Runtime.Serialization;
|
||||
using System.Runtime.Serialization.Formatters.Binary;
|
||||
|
||||
namespace Microsoft.Spark.CSharp
|
||||
{
|
||||
/// <summary>
|
||||
/// This class execute user defined methods.
|
||||
/// </summary>
|
||||
|
||||
internal class UDFCommand
|
||||
{
|
||||
private readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
private ILoggerService logger;
|
||||
private Stream inputStream;
|
||||
private Stream outputStream;
|
||||
private int splitIndex;
|
||||
private DateTime bootTime;
|
||||
private string deserializerMode;
|
||||
private string serializerMode;
|
||||
private IFormatter formatter;
|
||||
private Stopwatch commandProcessWatch;
|
||||
private int isSqlUdf;
|
||||
private List<WorkerFunc> workerFuncList;
|
||||
private int stageId;
|
||||
|
||||
public UDFCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime,
|
||||
string deserializerMode, string serializerMode, IFormatter formatter,
|
||||
Stopwatch commandProcessWatch, int isSqlUdf, List<WorkerFunc> workerFuncList, int stageId)
|
||||
{
|
||||
this.inputStream = inputStream;
|
||||
this.outputStream = outputStream;
|
||||
this.splitIndex = splitIndex;
|
||||
this.bootTime = bootTime;
|
||||
this.deserializerMode = deserializerMode;
|
||||
this.serializerMode = serializerMode;
|
||||
this.formatter = formatter;
|
||||
this.commandProcessWatch = commandProcessWatch;
|
||||
this.isSqlUdf = isSqlUdf;
|
||||
this.workerFuncList = workerFuncList;
|
||||
this.stageId = stageId;
|
||||
|
||||
InitializeLogger();
|
||||
}
|
||||
|
||||
private void InitializeLogger()
|
||||
{
|
||||
try
|
||||
{
|
||||
// if there exists exe.config file, then use log4net
|
||||
if (File.Exists(AppDomain.CurrentDomain.SetupInformation.ConfigurationFile))
|
||||
{
|
||||
LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance);
|
||||
}
|
||||
|
||||
logger = LoggerServiceFactory.GetLogger(typeof(UDFCommand));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine("InitializeLogger exception {0}, will exit", e);
|
||||
Environment.Exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
internal void Execute()
|
||||
{
|
||||
if (isSqlUdf == 0)
|
||||
{
|
||||
ExecuteNonSqlUDF();
|
||||
}
|
||||
else
|
||||
{
|
||||
ExecuteSqlUDF();
|
||||
}
|
||||
}
|
||||
|
||||
private void ExecuteNonSqlUDF()
|
||||
{
|
||||
int count = 0;
|
||||
int nullMessageCount = 0;
|
||||
logger.LogDebug("Beginning to execute non sql func");
|
||||
WorkerFunc workerFunc = workerFuncList[0];
|
||||
var func = workerFunc.CharpWorkerFunc.Func;
|
||||
|
||||
var funcProcessWatch = Stopwatch.StartNew();
|
||||
DateTime initTime = DateTime.UtcNow;
|
||||
foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
|
||||
{
|
||||
funcProcessWatch.Stop();
|
||||
|
||||
if (object.ReferenceEquals(null, message))
|
||||
{
|
||||
nullMessageCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
WriteOutput(outputStream, serializerMode, message, formatter);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogError("WriteOutput() failed at iteration {0}, execption {1}", count, ex);
|
||||
throw;
|
||||
}
|
||||
|
||||
count++;
|
||||
funcProcessWatch.Start();
|
||||
}
|
||||
|
||||
logger.LogInfo("Output entries count: " + count);
|
||||
logger.LogDebug("Null messages count: " + nullMessageCount);
|
||||
|
||||
WriteDiagnosticsInfo(outputStream, bootTime, initTime);
|
||||
|
||||
commandProcessWatch.Stop();
|
||||
|
||||
// log statistics
|
||||
logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
|
||||
logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
|
||||
}
|
||||
|
||||
private void ExecuteSqlUDF()
|
||||
{
|
||||
int count = 0;
|
||||
int nullMessageCount = 0;
|
||||
logger.LogDebug("Beginning to execute sql func");
|
||||
|
||||
var funcProcessWatch = Stopwatch.StartNew();
|
||||
DateTime initTime = DateTime.UtcNow;
|
||||
|
||||
foreach (var row in GetIterator(inputStream, deserializerMode, isSqlUdf))
|
||||
{
|
||||
List<Object> messages = new List<Object>();
|
||||
|
||||
foreach (WorkerFunc workerFunc in workerFuncList)
|
||||
{
|
||||
List<Object> args = new List<Object>();
|
||||
foreach (int offset in workerFunc.ArgOffsets)
|
||||
{
|
||||
args.Add(row[offset]);
|
||||
}
|
||||
|
||||
foreach (var message in workerFunc.CharpWorkerFunc.Func(splitIndex, new[] { args.ToArray()}))
|
||||
{
|
||||
funcProcessWatch.Stop();
|
||||
|
||||
if (object.ReferenceEquals(null, message))
|
||||
{
|
||||
nullMessageCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
messages.Add(message);
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
dynamic res = messages.ToArray();
|
||||
if (messages.Count == 1)
|
||||
{
|
||||
res = messages[0];
|
||||
}
|
||||
|
||||
WriteOutput(outputStream, serializerMode, res, formatter);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogError("WriteOutput() failed at iteration {0}, exception error {1}", count, ex.Message);
|
||||
throw;
|
||||
}
|
||||
|
||||
count++;
|
||||
funcProcessWatch.Start();
|
||||
}
|
||||
|
||||
logger.LogInfo("Output entries count: " + count);
|
||||
logger.LogDebug("Null messages count: " + nullMessageCount);
|
||||
|
||||
WriteDiagnosticsInfo(outputStream, bootTime, initTime);
|
||||
|
||||
commandProcessWatch.Stop();
|
||||
|
||||
// log statistics
|
||||
logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
|
||||
logger.LogInfo("stage {0}, command process time: {0}", stageId, commandProcessWatch.ElapsedMilliseconds);
|
||||
}
|
||||
|
||||
private void WriteOutput(Stream networkStream, string serializerMode, dynamic message, IFormatter formatter)
|
||||
{
|
||||
var buffer = GetSerializedMessage(serializerMode, message, formatter);
|
||||
if (buffer == null)
|
||||
{
|
||||
logger.LogError("Buffer is null");
|
||||
}
|
||||
|
||||
if (buffer.Length <= 0)
|
||||
{
|
||||
logger.LogError("Buffer length {0} cannot be <= 0", buffer.Length);
|
||||
}
|
||||
|
||||
SerDe.Write(networkStream, buffer.Length);
|
||||
SerDe.Write(networkStream, buffer);
|
||||
}
|
||||
|
||||
private byte[] GetSerializedMessage(string serializerMode, dynamic message, IFormatter formatter)
|
||||
{
|
||||
byte[] buffer;
|
||||
|
||||
switch ((SerializedMode)Enum.Parse(typeof(SerializedMode), serializerMode))
|
||||
{
|
||||
case SerializedMode.None:
|
||||
buffer = message as byte[];
|
||||
break;
|
||||
|
||||
case SerializedMode.String:
|
||||
buffer = SerDe.ToBytes(message as string);
|
||||
break;
|
||||
|
||||
case SerializedMode.Row:
|
||||
var pickler = new Pickler();
|
||||
buffer = pickler.dumps(new ArrayList { message });
|
||||
break;
|
||||
|
||||
default:
|
||||
try
|
||||
{
|
||||
var ms = new MemoryStream();
|
||||
formatter.Serialize(ms, message);
|
||||
buffer = ms.ToArray();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogError("Exception serializing output: " + ex);
|
||||
logger.LogError("{0} : {1}", message.GetType().Name, message.GetType().FullName);
|
||||
throw;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private void WriteDiagnosticsInfo(Stream networkStream, DateTime bootTime, DateTime initTime)
|
||||
{
|
||||
DateTime finishTime = DateTime.UtcNow;
|
||||
const string format = "MM/dd/yyyy hh:mm:ss.fff tt";
|
||||
|
||||
logger.LogDebug("bootTime: {0}, initTime: {1}, finish_time: {2}",
|
||||
bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format));
|
||||
|
||||
SerDe.Write(networkStream, (int)SpecialLengths.TIMING_DATA);
|
||||
SerDe.Write(networkStream, ToUnixTime(bootTime));
|
||||
SerDe.Write(networkStream, ToUnixTime(initTime));
|
||||
SerDe.Write(networkStream, ToUnixTime(finishTime));
|
||||
|
||||
SerDe.Write(networkStream, 0L); //shuffle.MemoryBytesSpilled
|
||||
SerDe.Write(networkStream, 0L); //shuffle.DiskBytesSpilled
|
||||
}
|
||||
|
||||
private long ToUnixTime(DateTime dt)
|
||||
{
|
||||
return (long)(dt - UnixTimeEpoch).TotalMilliseconds;
|
||||
}
|
||||
|
||||
private IEnumerable<dynamic> GetIterator(Stream inputStream, string serializedMode, int isFuncSqlUdf)
|
||||
{
|
||||
logger.LogInfo("Serialized mode in GetIterator: " + serializedMode);
|
||||
IFormatter formatter = new BinaryFormatter();
|
||||
var mode = (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode);
|
||||
int messageLength;
|
||||
Stopwatch watch = Stopwatch.StartNew();
|
||||
Row tempRow = null;
|
||||
|
||||
while ((messageLength = SerDe.ReadInt(inputStream)) != (int)SpecialLengths.END_OF_DATA_SECTION)
|
||||
{
|
||||
watch.Stop();
|
||||
if (messageLength > 0 || messageLength == (int)SpecialLengths.NULL)
|
||||
{
|
||||
watch.Start();
|
||||
byte[] buffer = messageLength > 0 ? SerDe.ReadBytes(inputStream, messageLength) : null;
|
||||
watch.Stop();
|
||||
switch (mode)
|
||||
{
|
||||
case SerializedMode.String:
|
||||
{
|
||||
if (messageLength > 0)
|
||||
{
|
||||
if (buffer == null)
|
||||
{
|
||||
logger.LogDebug("Buffer is null. Message length is {0}", messageLength);
|
||||
}
|
||||
yield return SerDe.ToString(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
yield return null;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case SerializedMode.Row:
|
||||
{
|
||||
Debug.Assert(messageLength > 0);
|
||||
var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
|
||||
|
||||
if (isFuncSqlUdf == 0)
|
||||
{
|
||||
foreach (var row in unpickledObjects.Select(item => (item as RowConstructor).GetRow()))
|
||||
{
|
||||
yield return row;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
foreach (var row in unpickledObjects)
|
||||
{
|
||||
yield return row;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case SerializedMode.Pair:
|
||||
{
|
||||
byte[] pairKey = buffer;
|
||||
byte[] pairValue;
|
||||
|
||||
watch.Start();
|
||||
int valueLength = SerDe.ReadInt(inputStream);
|
||||
if (valueLength > 0)
|
||||
{
|
||||
pairValue = SerDe.ReadBytes(inputStream, valueLength);
|
||||
}
|
||||
else if (valueLength == (int)SpecialLengths.NULL)
|
||||
{
|
||||
pairValue = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception(string.Format("unexpected valueLength: {0}", valueLength));
|
||||
}
|
||||
watch.Stop();
|
||||
|
||||
yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
|
||||
break;
|
||||
}
|
||||
|
||||
case SerializedMode.None: //just return raw bytes
|
||||
{
|
||||
yield return buffer;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
if (buffer != null)
|
||||
{
|
||||
var ms = new MemoryStream(buffer);
|
||||
yield return formatter.Deserialize(ms);
|
||||
}
|
||||
else
|
||||
{
|
||||
yield return null;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
watch.Start();
|
||||
}
|
||||
|
||||
logger.LogInfo("total receive time: {0}", watch.ElapsedMilliseconds);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,7 +2,6 @@
|
|||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
|
@ -17,8 +16,6 @@ using Microsoft.Spark.CSharp.Core;
|
|||
using Microsoft.Spark.CSharp.Interop.Ipc;
|
||||
using Microsoft.Spark.CSharp.Network;
|
||||
using Microsoft.Spark.CSharp.Services;
|
||||
using Microsoft.Spark.CSharp.Sql;
|
||||
using Razorvine.Pickle;
|
||||
|
||||
namespace Microsoft.Spark.CSharp
|
||||
{
|
||||
|
@ -31,7 +28,6 @@ namespace Microsoft.Spark.CSharp
|
|||
/// </summary>
|
||||
public class Worker
|
||||
{
|
||||
private static readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
private static ILoggerService logger;
|
||||
private static SparkCLRAssemblyHandler assemblyHandler;
|
||||
|
||||
|
@ -81,11 +77,13 @@ namespace Microsoft.Spark.CSharp
|
|||
InitializeLogger();
|
||||
logger.LogInfo("RunSimpleWorker ...");
|
||||
PrintFiles();
|
||||
|
||||
int javaPort = int.Parse(Console.ReadLine()); //reading port number written from JVM
|
||||
logger.LogDebug("Port number used to pipe in/out data between JVM and CLR {0}", javaPort);
|
||||
//int javaPort = int.Parse(Console.ReadLine()); //reading port number written from JVM
|
||||
var javaPort = int.Parse(Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT"));
|
||||
var secret = Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_SECRET");
|
||||
logger.LogDebug("Port and secret number used to pipe in/out data between JVM and CLR {0} {1}", javaPort, secret);
|
||||
var socket = InitializeSocket(javaPort);
|
||||
TaskRunner taskRunner = new TaskRunner(0, socket, false);
|
||||
//Microsoft.Spark.CSharp.Network.Utils.DoServerAuth(socket, secret);
|
||||
TaskRunner taskRunner = new TaskRunner(0, socket, false, secret);
|
||||
taskRunner.Run();
|
||||
}
|
||||
catch (Exception e)
|
||||
|
@ -119,7 +117,7 @@ namespace Microsoft.Spark.CSharp
|
|||
private static ISocketWrapper InitializeSocket(int javaPort)
|
||||
{
|
||||
var socket = SocketFactory.CreateSocket();
|
||||
socket.Connect(IPAddress.Loopback, javaPort);
|
||||
socket.Connect(IPAddress.Loopback, javaPort, null);
|
||||
return socket;
|
||||
}
|
||||
|
||||
|
@ -138,9 +136,13 @@ namespace Microsoft.Spark.CSharp
|
|||
//// initialize global state
|
||||
//shuffle.MemoryBytesSpilled = 0
|
||||
//shuffle.DiskBytesSpilled = 0
|
||||
SerDe.ReadInt(inputStream);
|
||||
SerDe.ReadInt(inputStream);
|
||||
SerDe.ReadInt(inputStream);
|
||||
SerDe.ReadLong(inputStream);
|
||||
|
||||
// fetch name of workdir
|
||||
string sparkFilesDir = SerDe.ReadString(inputStream);
|
||||
// fetch name of workdir
|
||||
string sparkFilesDir = SerDe.ReadString(inputStream);
|
||||
logger.LogDebug("spark_files_dir: " + sparkFilesDir);
|
||||
//SparkFiles._root_directory = sparkFilesDir
|
||||
//SparkFiles._is_running_on_worker = True
|
||||
|
@ -149,7 +151,7 @@ namespace Microsoft.Spark.CSharp
|
|||
|
||||
ProcessBroadcastVariables(inputStream);
|
||||
|
||||
Accumulator.threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
|
||||
Accumulator.threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
|
||||
|
||||
var formatter = ProcessCommand(inputStream, outputStream, splitIndex, bootTime);
|
||||
|
||||
|
@ -255,96 +257,119 @@ namespace Microsoft.Spark.CSharp
|
|||
logger.LogDebug("Is func Sql UDF = {0}", isSqlUdf);
|
||||
|
||||
IFormatter formatter = new BinaryFormatter();
|
||||
UDFCommand command = null;
|
||||
|
||||
if (isSqlUdf == 0)
|
||||
{
|
||||
logger.LogDebug("Processing non-UDF command");
|
||||
int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("Command length: " + lengthOfCommandByteArray);
|
||||
|
||||
if (lengthOfCommandByteArray > 0)
|
||||
{
|
||||
var commandProcessWatch = new Stopwatch();
|
||||
commandProcessWatch.Start();
|
||||
|
||||
int stageId;
|
||||
string deserializerMode;
|
||||
string serializerMode;
|
||||
CSharpWorkerFunc workerFunc;
|
||||
ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
|
||||
out workerFunc);
|
||||
|
||||
ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, workerFunc, serializerMode,
|
||||
formatter, commandProcessWatch, stageId, isSqlUdf);
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-(");
|
||||
}
|
||||
command = ProcessNonUdfCommand(inputStream, outputStream, splitIndex, bootTime, formatter, isSqlUdf);
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.LogDebug("Processing UDF command");
|
||||
var udfCount = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("Count of UDFs = {0}", udfCount);
|
||||
command = ProcessUdfCommand(inputStream, outputStream, splitIndex, bootTime, formatter, isSqlUdf);
|
||||
}
|
||||
|
||||
if (udfCount == 1)
|
||||
{
|
||||
CSharpWorkerFunc func = null;
|
||||
var argCount = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("Count of args = {0}", argCount);
|
||||
|
||||
var argOffsets = new List<int>();
|
||||
|
||||
for (int argIndex = 0; argIndex < argCount; argIndex++)
|
||||
{
|
||||
var offset = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
|
||||
argOffsets.Add(offset);
|
||||
}
|
||||
var chainedFuncCount = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("Count of chained func = {0}", chainedFuncCount);
|
||||
|
||||
var commandProcessWatch = new Stopwatch();
|
||||
int stageId = -1;
|
||||
string deserializerMode = null;
|
||||
string serializerMode = null;
|
||||
for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
|
||||
{
|
||||
int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("UDF command length: " + lengthOfCommandByteArray)
|
||||
;
|
||||
|
||||
if (lengthOfCommandByteArray > 0)
|
||||
{
|
||||
CSharpWorkerFunc workerFunc;
|
||||
ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
|
||||
out workerFunc);
|
||||
|
||||
func = func == null ? workerFunc : CSharpWorkerFunc.Chain(func, workerFunc);
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.LogWarn("UDF lengthOfCommandByteArray = 0. Nothing to execute :-(");
|
||||
}
|
||||
}
|
||||
|
||||
Debug.Assert(stageId != -1);
|
||||
Debug.Assert(deserializerMode != null);
|
||||
Debug.Assert(serializerMode != null);
|
||||
Debug.Assert(func != null);
|
||||
ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, func, serializerMode, formatter,
|
||||
commandProcessWatch, stageId, isSqlUdf);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new NotSupportedException(); //TODO - add support for multiple UDFs
|
||||
}
|
||||
if (command != null)
|
||||
{
|
||||
command.Execute();
|
||||
}
|
||||
|
||||
return formatter;
|
||||
}
|
||||
|
||||
private static UDFCommand ProcessNonUdfCommand(Stream inputStream, Stream outputStream, int splitIndex,
|
||||
DateTime bootTime, IFormatter formatter, int isSqlUdf)
|
||||
{
|
||||
logger.LogDebug("Processing non-UDF command");
|
||||
int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("Command length: " + lengthOfCommandByteArray);
|
||||
|
||||
UDFCommand command = null;
|
||||
if (lengthOfCommandByteArray > 0)
|
||||
{
|
||||
var commandProcessWatch = new Stopwatch();
|
||||
commandProcessWatch.Start();
|
||||
|
||||
int stageId;
|
||||
string deserializerMode;
|
||||
string serializerMode;
|
||||
CSharpWorkerFunc cSharpWorkerFunc;
|
||||
ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
|
||||
out cSharpWorkerFunc);
|
||||
|
||||
command = new UDFCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode,
|
||||
serializerMode, formatter, commandProcessWatch, isSqlUdf,
|
||||
new List<WorkerFunc>() { new WorkerFunc(cSharpWorkerFunc, 0, null) }, stageId);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-(");
|
||||
}
|
||||
|
||||
return command;
|
||||
}
|
||||
|
||||
private static UDFCommand ProcessUdfCommand(Stream inputStream, Stream outputStream, int splitIndex,
|
||||
DateTime bootTime, IFormatter formatter, int isSqlUdf)
|
||||
{
|
||||
logger.LogDebug("Processing UDF command");
|
||||
var udfCount = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("Count of UDFs = {0}", udfCount);
|
||||
|
||||
int stageId = -1;
|
||||
string deserializerMode = null;
|
||||
string serializerMode = null;
|
||||
var commandProcessWatch = new Stopwatch();
|
||||
List<WorkerFunc> workerFuncList = new List<WorkerFunc>();
|
||||
|
||||
for(int udfIter = 0; udfIter < udfCount; udfIter++)
|
||||
{
|
||||
CSharpWorkerFunc func = null;
|
||||
var argCount = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("Count of args = {0}", argCount);
|
||||
|
||||
List<int> argOffsets = new List<int>();
|
||||
for (int argIndex = 0; argIndex < argCount; argIndex++)
|
||||
{
|
||||
var offset = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
|
||||
argOffsets.Add(offset);
|
||||
}
|
||||
|
||||
var chainedFuncCount = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("Count of chained func = {0}", chainedFuncCount);
|
||||
|
||||
for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
|
||||
{
|
||||
int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
|
||||
logger.LogDebug("UDF command length: " + lengthOfCommandByteArray);
|
||||
|
||||
if (lengthOfCommandByteArray > 0)
|
||||
{
|
||||
CSharpWorkerFunc workerFunc;
|
||||
ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
|
||||
out workerFunc);
|
||||
|
||||
func = func == null ? workerFunc : CSharpWorkerFunc.Chain(func, workerFunc);
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.LogWarn("UDF lengthOfCommandByteArray = 0. Nothing to execute :-(");
|
||||
}
|
||||
}
|
||||
|
||||
Debug.Assert(stageId != -1);
|
||||
Debug.Assert(deserializerMode != null);
|
||||
Debug.Assert(serializerMode != null);
|
||||
Debug.Assert(func != null);
|
||||
|
||||
workerFuncList.Add(new WorkerFunc(func, argCount, argOffsets));
|
||||
}
|
||||
|
||||
return new UDFCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode,
|
||||
serializerMode, formatter, commandProcessWatch, isSqlUdf, workerFuncList, stageId);
|
||||
}
|
||||
|
||||
private static void ReadCommand(Stream networkStream, IFormatter formatter, out int stageId,
|
||||
out string deserializerMode,
|
||||
out string serializerMode, out CSharpWorkerFunc workerFunc)
|
||||
|
@ -388,116 +413,7 @@ namespace Microsoft.Spark.CSharp
|
|||
"--------------------------------------------------------------------------------------------------------------");
|
||||
logger.LogDebug(sb.ToString());
|
||||
}
|
||||
|
||||
private static void ExecuteCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime,
|
||||
string deserializerMode, CSharpWorkerFunc workerFunc, string serializerMode,
|
||||
IFormatter formatter, Stopwatch commandProcessWatch, int stageId, int isSqlUdf)
|
||||
{
|
||||
int count = 0;
|
||||
int nullMessageCount = 0;
|
||||
logger.LogDebug("Beginning to execute func");
|
||||
var func = workerFunc.Func;
|
||||
|
||||
var funcProcessWatch = Stopwatch.StartNew();
|
||||
DateTime initTime = DateTime.UtcNow;
|
||||
foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
|
||||
{
|
||||
funcProcessWatch.Stop();
|
||||
|
||||
if (object.ReferenceEquals(null, message))
|
||||
{
|
||||
nullMessageCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
WriteOutput(outputStream, serializerMode, message, formatter);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
logger.LogError("WriteOutput() failed at iteration {0}", count);
|
||||
throw;
|
||||
}
|
||||
|
||||
count++;
|
||||
funcProcessWatch.Start();
|
||||
}
|
||||
|
||||
logger.LogInfo("Output entries count: " + count);
|
||||
logger.LogDebug("Null messages count: " + nullMessageCount);
|
||||
|
||||
//if profiler:
|
||||
// profiler.profile(process)
|
||||
//else:
|
||||
// process()
|
||||
|
||||
WriteDiagnosticsInfo(outputStream, bootTime, initTime);
|
||||
|
||||
commandProcessWatch.Stop();
|
||||
|
||||
// log statistics
|
||||
logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
|
||||
logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
|
||||
}
|
||||
|
||||
private static void WriteOutput(Stream networkStream, string serializerMode, dynamic message, IFormatter formatter)
|
||||
{
|
||||
var buffer = GetSerializedMessage(serializerMode, message, formatter);
|
||||
if (buffer == null)
|
||||
{
|
||||
logger.LogError("Buffer is null");
|
||||
}
|
||||
|
||||
if (buffer.Length <= 0)
|
||||
{
|
||||
logger.LogError("Buffer length {0} cannot be <= 0", buffer.Length);
|
||||
}
|
||||
|
||||
//Debug.Assert(buffer != null);
|
||||
//Debug.Assert(buffer.Length > 0);
|
||||
SerDe.Write(networkStream, buffer.Length);
|
||||
SerDe.Write(networkStream, buffer);
|
||||
}
|
||||
|
||||
private static byte[] GetSerializedMessage(string serializerMode, dynamic message, IFormatter formatter)
|
||||
{
|
||||
byte[] buffer;
|
||||
|
||||
switch ((SerializedMode)Enum.Parse(typeof(SerializedMode), serializerMode))
|
||||
{
|
||||
case SerializedMode.None:
|
||||
buffer = message as byte[];
|
||||
break;
|
||||
|
||||
case SerializedMode.String:
|
||||
buffer = SerDe.ToBytes(message as string);
|
||||
break;
|
||||
|
||||
case SerializedMode.Row:
|
||||
var pickler = new Pickler();
|
||||
buffer = pickler.dumps(new ArrayList { message });
|
||||
break;
|
||||
|
||||
default:
|
||||
try
|
||||
{
|
||||
var ms = new MemoryStream();
|
||||
formatter.Serialize(ms, message);
|
||||
buffer = ms.ToArray();
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
logger.LogError("Exception serializing output");
|
||||
logger.LogError("{0} : {1}", message.GetType().Name, message.GetType().FullName);
|
||||
throw;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
private static int ReadDiagnosticsInfo(Stream networkStream)
|
||||
{
|
||||
int rddId = SerDe.ReadInt(networkStream);
|
||||
|
@ -505,22 +421,7 @@ namespace Microsoft.Spark.CSharp
|
|||
int partitionId = SerDe.ReadInt(networkStream);
|
||||
logger.LogInfo("rddInfo: rddId {0}, stageId {1}, partitionId {2}", rddId, stageId, partitionId);
|
||||
return stageId;
|
||||
}
|
||||
|
||||
private static void WriteDiagnosticsInfo(Stream networkStream, DateTime bootTime, DateTime initTime)
|
||||
{
|
||||
DateTime finishTime = DateTime.UtcNow;
|
||||
const string format = "MM/dd/yyyy hh:mm:ss.fff tt";
|
||||
logger.LogDebug("bootTime: {0}, initTime: {1}, finish_time: {2}",
|
||||
bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format));
|
||||
SerDe.Write(networkStream, (int)SpecialLengths.TIMING_DATA);
|
||||
SerDe.Write(networkStream, ToUnixTime(bootTime));
|
||||
SerDe.Write(networkStream, ToUnixTime(initTime));
|
||||
SerDe.Write(networkStream, ToUnixTime(finishTime));
|
||||
|
||||
SerDe.Write(networkStream, 0L); //shuffle.MemoryBytesSpilled
|
||||
SerDe.Write(networkStream, 0L); //shuffle.DiskBytesSpilled
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteAccumulatorValues(Stream networkStream, IFormatter formatter)
|
||||
{
|
||||
|
@ -564,121 +465,7 @@ namespace Microsoft.Spark.CSharp
|
|||
|
||||
logger.LogDebug("Files available in executor");
|
||||
logger.LogDebug("Location: {0}{1}{2}", folder, Environment.NewLine, outfiles.ToString());
|
||||
}
|
||||
|
||||
private static long ToUnixTime(DateTime dt)
|
||||
{
|
||||
return (long)(dt - UnixTimeEpoch).TotalMilliseconds;
|
||||
}
|
||||
|
||||
private static IEnumerable<dynamic> GetIterator(Stream inputStream, string serializedMode, int isFuncSqlUdf)
|
||||
{
|
||||
logger.LogInfo("Serialized mode in GetIterator: " + serializedMode);
|
||||
IFormatter formatter = new BinaryFormatter();
|
||||
var mode = (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode);
|
||||
int messageLength;
|
||||
Stopwatch watch = Stopwatch.StartNew();
|
||||
while ((messageLength = SerDe.ReadInt(inputStream)) != (int)SpecialLengths.END_OF_DATA_SECTION)
|
||||
{
|
||||
watch.Stop();
|
||||
if (messageLength > 0 || messageLength == (int)SpecialLengths.NULL)
|
||||
{
|
||||
watch.Start();
|
||||
byte[] buffer = messageLength > 0 ? SerDe.ReadBytes(inputStream, messageLength) : null;
|
||||
watch.Stop();
|
||||
switch (mode)
|
||||
{
|
||||
case SerializedMode.String:
|
||||
{
|
||||
if (messageLength > 0)
|
||||
{
|
||||
if (buffer == null)
|
||||
{
|
||||
logger.LogDebug("Buffer is null. Message length is {0}", messageLength);
|
||||
}
|
||||
yield return SerDe.ToString(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
yield return null;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case SerializedMode.Row:
|
||||
{
|
||||
Debug.Assert(messageLength > 0);
|
||||
var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
|
||||
|
||||
if (isFuncSqlUdf == 0)
|
||||
{
|
||||
foreach (var row in unpickledObjects.Select(item => (item as RowConstructor).GetRow()))
|
||||
{
|
||||
yield return row;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
foreach (var row in unpickledObjects)
|
||||
{
|
||||
yield return row;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case SerializedMode.Pair:
|
||||
{
|
||||
byte[] pairKey = buffer;
|
||||
byte[] pairValue;
|
||||
|
||||
watch.Start();
|
||||
int valueLength = SerDe.ReadInt(inputStream);
|
||||
if (valueLength > 0)
|
||||
{
|
||||
pairValue = SerDe.ReadBytes(inputStream, valueLength);
|
||||
}
|
||||
else if (valueLength == (int)SpecialLengths.NULL)
|
||||
{
|
||||
pairValue = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception(string.Format("unexpected valueLength: {0}", valueLength));
|
||||
}
|
||||
watch.Stop();
|
||||
|
||||
yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
|
||||
break;
|
||||
}
|
||||
|
||||
case SerializedMode.None: //just return raw bytes
|
||||
{
|
||||
yield return buffer;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
if (buffer != null)
|
||||
{
|
||||
var ms = new MemoryStream(buffer);
|
||||
yield return formatter.Deserialize(ms);
|
||||
}
|
||||
else
|
||||
{
|
||||
yield return null;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
watch.Start();
|
||||
}
|
||||
|
||||
logger.LogInfo("total receive time: {0}", watch.ElapsedMilliseconds);
|
||||
}
|
||||
}
|
||||
|
||||
internal class SparkCLRAssemblyHandler
|
||||
{
|
||||
|
|
|
@ -46,6 +46,8 @@
|
|||
<Reference Include="Microsoft.CSharp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="WorkerFunc.cs" />
|
||||
<Compile Include="UDFCommand.cs" />
|
||||
<Compile Include="MultiThreadWorker.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
<Compile Include="TaskRunner.cs" />
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
using System.Runtime.Serialization;
|
||||
using Microsoft.Spark.CSharp.Core;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Microsoft.Spark.CSharp
|
||||
{
|
||||
internal class WorkerFunc
|
||||
{
|
||||
internal CSharpWorkerFunc CharpWorkerFunc { get; }
|
||||
|
||||
internal int ArgsCount { get; }
|
||||
|
||||
internal List<int> ArgOffsets { get; }
|
||||
|
||||
internal WorkerFunc(CSharpWorkerFunc func, int argsCount, List<int> argOffsets)
|
||||
{
|
||||
CharpWorkerFunc = func;
|
||||
ArgsCount = argsCount;
|
||||
ArgOffsets = argOffsets;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -81,6 +81,7 @@ namespace WorkerTest
|
|||
worker.Start();
|
||||
int serverPort = 0;
|
||||
serverPort = SerDe.ReadInt(worker.StandardOutput.BaseStream);
|
||||
Environment.SetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT", serverPort.ToString());
|
||||
|
||||
StreamReader stdoutReader = worker.StandardOutput;
|
||||
Task.Run(() => {
|
||||
|
@ -119,7 +120,7 @@ namespace WorkerTest
|
|||
private ISocketWrapper CreateSocket(int serverPort)
|
||||
{
|
||||
var socket =SocketFactory.CreateSocket();
|
||||
socket.Connect(IPAddress.Loopback, serverPort);
|
||||
socket.Connect(IPAddress.Loopback, serverPort, null);
|
||||
return socket;
|
||||
}
|
||||
|
||||
|
@ -131,6 +132,10 @@ namespace WorkerTest
|
|||
{
|
||||
SerDe.Write(s, splitIndex);
|
||||
SerDe.Write(s, ver);
|
||||
SerDe.Write(s, 0);
|
||||
SerDe.Write(s, 0);
|
||||
SerDe.Write(s, 0);
|
||||
SerDe.Write(s, 0L);
|
||||
SerDe.Write(s, sparkFilesDir);
|
||||
SerDe.Write(s, numberOfIncludesItems);
|
||||
SerDe.Write(s, numBroadcastVariables);
|
||||
|
|
|
@ -93,6 +93,7 @@ namespace WorkerTest
|
|||
}
|
||||
};
|
||||
|
||||
Environment.SetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT", port.ToString());
|
||||
lock (syncLock)
|
||||
{
|
||||
output.Clear();
|
||||
|
@ -125,6 +126,10 @@ namespace WorkerTest
|
|||
{
|
||||
SerDe.Write(s, splitIndex);
|
||||
SerDe.Write(s, ver);
|
||||
SerDe.Write(s, 0);
|
||||
SerDe.Write(s, 0);
|
||||
SerDe.Write(s, 0);
|
||||
SerDe.Write(s, 0L);
|
||||
SerDe.Write(s, sparkFilesDir);
|
||||
SerDe.Write(s, numberOfIncludesItems);
|
||||
SerDe.Write(s, numBroadcastVariables);
|
||||
|
@ -631,6 +636,10 @@ namespace WorkerTest
|
|||
{
|
||||
SerDe.Write(s, splitIndex);
|
||||
SerDe.Write(s, ver);
|
||||
SerDe.Write(s, 0);
|
||||
SerDe.Write(s, 0);
|
||||
SerDe.Write(s, 0);
|
||||
SerDe.Write(s, 0L);
|
||||
SerDe.Write(s, sparkFilesDir);
|
||||
SerDe.Write(s, numberOfIncludesItems);
|
||||
|
||||
|
@ -802,6 +811,10 @@ namespace WorkerTest
|
|||
using (var inputStream = new MemoryStream(500))
|
||||
{
|
||||
SerDe.Write(inputStream, "1.0"); //version
|
||||
SerDe.Write(inputStream, 0);
|
||||
SerDe.Write(inputStream, 0);
|
||||
SerDe.Write(inputStream, 0);
|
||||
SerDe.Write(inputStream, 0L);
|
||||
SerDe.Write(inputStream, ""); //includes directory
|
||||
SerDe.Write(inputStream, 0); //number of included items
|
||||
SerDe.Write(inputStream, 0); //number of broadcast variables
|
||||
|
|
|
@ -35,9 +35,8 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
<Reference Include="Newtonsoft.Json">
|
||||
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
|
|
|
@ -32,17 +32,17 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
|
@ -84,4 +84,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -35,17 +35,17 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
|
@ -84,4 +84,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -1,6 +1,6 @@
|
|||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 14
|
||||
VisualStudioVersion = 14.0.25123.0
|
||||
VisualStudioVersion = 14.0.25420.1
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HdfsWordCount", "Streaming\HdfsWordCount\HdfsWordCount.csproj", "{6A2C7CF9-D64E-490D-9841-269EE14F7932}"
|
||||
EndProject
|
||||
|
|
|
@ -34,14 +34,17 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net">
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
|
@ -80,4 +83,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -33,17 +33,17 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="CSharpWorker">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
|
@ -75,4 +75,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -34,17 +34,17 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
|
@ -82,4 +82,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -34,17 +34,17 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
|
@ -82,4 +82,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -34,16 +34,18 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
|
@ -85,4 +87,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -32,22 +32,22 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="CSharpWorker">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Pyrolite">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||
|
@ -80,4 +80,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -32,15 +32,17 @@
|
|||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net, Version=1.2.10.0, Culture=neutral, PublicKeyToken=1b44e1d426115821, processorArchitecture=MSIL">
|
||||
<Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
|
@ -79,4 +81,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -66,13 +66,13 @@
|
|||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net">
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="mscorlib" />
|
||||
<Reference Include="FSharp.Core, Version=$(TargetFSharpCoreVersion), Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
|
||||
|
@ -98,4 +98,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -71,20 +71,17 @@
|
|||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
|
||||
<Private>True</Private>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="FSharp.Core">
|
||||
<HintPath>..\..\packages\FSharp.Core.4.0.0.1\lib\net40\FSharp.Core.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="log4net">
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
<HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="mscorlib" />
|
||||
<Reference Include="Newtonsoft.Json">
|
||||
|
@ -110,4 +107,4 @@
|
|||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
</Project>
|
|
@ -145,7 +145,7 @@ The following sample commands show how to run Mobius examples in local mode. Usi
|
|||
Computes the _approximate_ value of Pi using two appropaches and displays the value.
|
||||
|
||||
### WordCount Example (Batch)
|
||||
* Run `sparkclr-submit.cmd --exe SparkClrWordCount.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <InputFilePath>`
|
||||
* Run `sparkclr-submit.cmd --exe SparkClrPi.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <InputFilePath>`
|
||||
|
||||
`InputFilePath` should be in one of the following formats:
|
||||
* `hdfs://path/to/inputfile`
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>com.microsoft.sparkclr</groupId>
|
||||
<artifactId>spark-clr_2.11</artifactId>
|
||||
<version>2.0.200-SNAPSHOT</version>
|
||||
<version>2.3.1-SNAPSHOT</version>
|
||||
<name>Mobius Project</name>
|
||||
<description>C# language binding and extensions to Apache Spark</description>
|
||||
<url>https://github.com/Microsoft/Mobius</url>
|
||||
|
@ -35,7 +35,7 @@
|
|||
<maven.compiler.target>1.5</maven.compiler.target>
|
||||
<encoding>UTF-8</encoding>
|
||||
<scala.version>2.11.8</scala.version>
|
||||
<spark.version>2.0.2</spark.version>
|
||||
<spark.version>2.3.1</spark.version>
|
||||
<scala.binary.version>2.11</scala.binary.version>
|
||||
</properties>
|
||||
|
||||
|
@ -106,14 +106,19 @@
|
|||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-hive_2.11</artifactId>
|
||||
<version>2.0.0</version>
|
||||
<version>${spark.version}</version>
|
||||
<!--the following is placeholder for building uber package. Please keep as-is-->
|
||||
<!--<scope>provided</scope>-->
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.databricks</groupId>
|
||||
<artifactId>spark-csv_2.10</artifactId>
|
||||
<version>1.4.0</version>
|
||||
<artifactId>spark-csv_2.11</artifactId>
|
||||
<version>1.5.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.databricks</groupId>
|
||||
<artifactId>spark-avro_2.11</artifactId>
|
||||
<version>4.0.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ import java.util.{List => JList, Map => JMap}
|
|||
|
||||
import org.apache.hadoop.io.compress.CompressionCodec
|
||||
import org.apache.spark.api.python._
|
||||
import org.apache.spark.api.python.PythonAccumulatorV2
|
||||
import org.apache.spark.broadcast.Broadcast
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark._
|
||||
|
@ -34,7 +35,7 @@ class CSharpRDD(
|
|||
cSharpWorkerExecutable: String,
|
||||
unUsedVersionIdentifier: String,
|
||||
broadcastVars: JList[Broadcast[PythonBroadcast]],
|
||||
accumulator: Accumulator[JList[Array[Byte]]])
|
||||
accumulator: PythonAccumulatorV2)
|
||||
extends PythonRDD (
|
||||
parent,
|
||||
SQLUtils.createCSharpFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
|
||||
|
@ -95,7 +96,7 @@ class CSharpRDD(
|
|||
logInfo("Env vars: " + envVars.asScala.mkString(", "))
|
||||
|
||||
val runner = new PythonRunner(
|
||||
Seq(ChainedPythonFunctions(Seq(func))), bufferSize, reuse_worker, false, Array(Array(0)))
|
||||
Seq(ChainedPythonFunctions(Seq(func))), bufferSize, reuseWorker)
|
||||
runner.compute(firstParent.iterator(split, context), split.index, context)
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ package org.apache.spark.sql.api.csharp
|
|||
import java.io.{ByteArrayOutputStream, DataOutputStream}
|
||||
|
||||
import org.apache.spark.{Accumulator, SparkContext}
|
||||
import org.apache.spark.api.python.PythonAccumulatorV2
|
||||
import org.apache.spark.api.csharp.SerDe
|
||||
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
|
||||
import org.apache.spark.api.python.{PythonBroadcast, PythonFunction, SerDeUtil}
|
||||
|
@ -51,7 +52,7 @@ object SQLUtils {
|
|||
cSharpWorkerExecutable: String,
|
||||
unUsedVersionIdentifier: String,
|
||||
broadcastVars: JList[Broadcast[PythonBroadcast]],
|
||||
accumulator: Accumulator[JList[Array[Byte]]]) : PythonFunction = {
|
||||
accumulator: PythonAccumulatorV2) : PythonFunction = {
|
||||
PythonFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
|
||||
unUsedVersionIdentifier, broadcastVars, accumulator)
|
||||
}
|
||||
|
|
|
@ -127,17 +127,17 @@ object Utils extends Logging {
|
|||
timer.schedule(new TimerTask() {
|
||||
@Override
|
||||
def run() {
|
||||
Runtime.getRuntime.halt(status)
|
||||
if (status!=0) { Runtime.getRuntime.halt(status); }
|
||||
}
|
||||
}, maxDelayMillis)
|
||||
// try to exit nicely
|
||||
System.exit(status);
|
||||
if (status!=0) { System.exit(status); }
|
||||
} catch {
|
||||
// exit nastily if we have a problem
|
||||
case ex: Throwable => Runtime.getRuntime.halt(status)
|
||||
} finally {
|
||||
// should never get here
|
||||
Runtime.getRuntime.halt(status)
|
||||
if (status!=0) { Runtime.getRuntime.halt(status); }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -147,7 +147,7 @@ object Utils extends Logging {
|
|||
* @param status the exit status, zero for OK, non-zero for error
|
||||
*/
|
||||
def exit(status: Int): Unit = {
|
||||
exit(status, 1000)
|
||||
exit(status, 1000);
|
||||
}
|
||||
|
||||
private[spark] def listZipFileEntries(file: File): Array[String] = {
|
||||
|
|
|
@ -42,7 +42,7 @@ if not exist "%SPARK_JARS_DIR%" (
|
|||
|
||||
set SPARK_JARS_CLASSPATH=%SPARK_JARS_DIR%\*
|
||||
|
||||
if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.11-2.0.200-SNAPSHOT.jar)
|
||||
if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.11-2.3.1-SNAPSHOT.jar)
|
||||
echo [sparkclr-submit.cmd] SPARKCLR_JAR=%SPARKCLR_JAR%
|
||||
set SPARKCLR_CLASSPATH=%SPARKCLR_HOME%\lib\%SPARKCLR_JAR%
|
||||
REM SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
|
||||
|
@ -105,4 +105,4 @@ goto :eof
|
|||
@echo Example 2:
|
||||
@echo sparkclr-submit.cmd [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe c:\sparkclrapp\driver.zip arg1 arg2 arg3
|
||||
@echo Example 3:
|
||||
@echo sparkclr-submit.cmd [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar hdfs://path/to/spark-clr-1.6.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3
|
||||
@echo sparkclr-submit.cmd [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar hdfs://path/to/spark-clr_2.11-2.3.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3
|
||||
|
|
|
@ -32,7 +32,7 @@ function usage() {
|
|||
echo "Example 2:"
|
||||
echo "sparkclr-submit.sh [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe sparkclrapp/driver.zip arg1 arg2 arg3"
|
||||
echo "Example 3:"
|
||||
echo "sparkclr-submit.sh [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar --remote-sparkclr-jar hdfs://path/to/spark-clr_2.10-1.6.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3"
|
||||
echo "sparkclr-submit.sh [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar --remote-sparkclr-jar hdfs://path/to/spark-clr_2.11-2.3.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3"
|
||||
}
|
||||
|
||||
[ "$SPARK_HOME" = "" ] && spark_home_error
|
||||
|
@ -57,7 +57,7 @@ fi
|
|||
|
||||
export SPARK_JARS_CLASSPATH="$SPARK_JARS_DIR/*"
|
||||
|
||||
export SPARKCLR_JAR=spark-clr_2.11-2.0.200-SNAPSHOT.jar
|
||||
export SPARKCLR_JAR=spark-clr_2.11-2.3.1-SNAPSHOT.jar
|
||||
export SPARKCLR_CLASSPATH="$SPARKCLR_HOME/lib/$SPARKCLR_JAR"
|
||||
# SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
|
||||
[ ! "$SPARKCLR_DEBUGMODE_EXT_JARS" = "" ] && export SPARKCLR_CLASSPATH="$SPARKCLR_CLASSPATH:$SPARKCLR_DEBUGMODE_EXT_JARS"
|
||||
|
|
Загрузка…
Ссылка в новой задаче