From 9aa97b98c61d4ac0c54c1149125c378dd21fa182 Mon Sep 17 00:00:00 2001
From: sutyag <42341317+sutyag@users.noreply.github.com>
Date: Wed, 21 Nov 2018 06:53:43 +0530
Subject: [PATCH] Dev/sutyag/upgrade mobius (#697)

* basic

* Add extractor and outputter

* Add reducer not done

* Add procedure

* kill node, broadcast, upload executable error feed to cosmos, specify avro or parquet syntax

* Add more functions to HDFS.
Add submitter heartbeat
Update doc

* Redesign cosmos download, add replication setting for hdfs

* Improve executable runner to deal with bad lines

* MERGE MOBIUS

* change dependency path

* Add registration method to mobius

* Major refactoring to add ISparkosmosModule to modulize everything
Start supporting streaming
Fixed a couple of Mobius bugs
Added integration tests
Reenabled unit tests
Added DatedPath

* Make sparkcontext settable, fix setjobgroup

* Expose more interface from Mobius

* Mobius change for Spark 2.3

* fix version conflict, remove unused files

* Added support for multiple UDFs

* Fixed non sql udf issue

* 1. Upgarde mobius to spark 2.3.1  2. Fixed UDF bugs 3. Added support for multipe UDFs

* 1. Added sample testcases 2.Updated referece for examples

* Removed stashed files

* Fixed review comments

* Fixed review comments

* Fixed failed unit test cases

* Deleting all the things

* Updated version in appveyor

* Updated tartool download path

* Fixed java process terminate issue

* Revert access modifier to internal from public for JvmBridge
---
 README.md                                     |   2 +-
 appveyor.yml                                  |   2 +-
 build/Build.cmd                               |   2 +
 build/localmode/RunSamples.cmd                |   4 +-
 build/localmode/downloadtools.ps1             |   2 +-
 build/localmode/run-samples.sh                |   2 +-
 cpp/Riosock/Riosock.vcxproj                   |   6 +-
 .../Microsoft.Spark.CSharp/Adapter.csproj     |  11 +-
 .../Core/IRDDCollector.cs                     |   3 +-
 .../Microsoft.Spark.CSharp/Core/RDD.cs        |  24 +-
 .../Core/RDDCollector.cs                      |  22 +-
 .../Interop/Ipc/JvmBridge.cs                  |   2 +-
 .../Interop/Ipc/JvmObjectReference.cs         |   9 +-
 .../Interop/SparkCLREnvironment.cs            |   4 +-
 .../Microsoft.Spark.CSharp/Network/ByteBuf.cs |   2 +-
 .../Network/DefaultSocketWrapper.cs           | 327 +++---
 .../Network/ISocketWrapper.cs                 |  15 +-
 .../Network/RioSocketWrapper.cs               |   2 +-
 .../Network/SaeaSocketWrapper.cs              |   2 +-
 .../Network/SocketInfo.cs                     |  28 +
 .../Proxy/IDataFrameProxy.cs                  |   6 +-
 .../Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs |   3 +-
 .../Proxy/ISparkContextProxy.cs               |   3 +-
 .../Proxy/Ipc/DataFrameIpcProxy.cs            |  15 +-
 .../Proxy/Ipc/RDDIpcProxy.cs                  |   5 +-
 .../Proxy/Ipc/SparkContextIpcProxy.cs         |  13 +-
 .../Proxy/Ipc/SparkSessionIpcProxy.cs         |   4 +-
 .../Proxy/Ipc/SqlContextIpcProxy.cs           |   2 +-
 .../Microsoft.Spark.CSharp/Sql/DataFrame.cs   |  31 +-
 .../Sql/DataFrameReader.cs                    |  15 +-
 .../Sql/DataFrameWriter.cs                    |  13 +-
 .../Microsoft.Spark.CSharp/Sql/Dataset.cs     |   3 +-
 .../Microsoft.Spark.CSharp/Sql/Functions.cs   |  41 +-
 .../Adapter/Microsoft.Spark.CSharp/Sql/Row.cs | 132 +--
 .../Sql/RowConstructor.cs                     |   4 +-
 .../Sql/SparkSession.cs                       |  38 +-
 .../Microsoft.Spark.CSharp/Sql/SqlContext.cs  |  39 +-
 .../Microsoft.Spark.CSharp/Sql/Types.cs       | 989 ++++++++++--------
 .../Sql/UdfRegistration.cs                    |  16 +-
 .../Microsoft.Spark.CSharp/packages.config    |   6 +-
 .../Microsoft.Spark.CSharp.Adapter.Doc.XML    |  34 +-
 .../documentation/Mobius_API_Documentation.md |   6 +-
 csharp/AdapterTest/AccumulatorTest.cs         |   2 +-
 csharp/AdapterTest/AdapterTest.csproj         |  11 +-
 csharp/AdapterTest/DataFrameTest.cs           |  21 +-
 csharp/AdapterTest/DatasetTest.cs             |   4 +-
 .../AdapterTest/Mocks/MockDataFrameProxy.cs   |  10 +-
 csharp/AdapterTest/Mocks/MockRDDCollector.cs  |   3 +-
 csharp/AdapterTest/Mocks/MockRddProxy.cs      |   3 +-
 csharp/AdapterTest/Mocks/MockRow.cs           |   7 +
 .../Mocks/MockSparkContextProxy.cs            |   8 +-
 csharp/AdapterTest/SocketWrapperTest.cs       |   4 +-
 csharp/AdapterTest/TestWithMoqDemo.cs         |   2 +-
 csharp/AdapterTest/packages.config            |   5 +-
 csharp/Repl/Repl.csproj                       |   9 +-
 csharp/Repl/packages.config                   |   8 +-
 .../DataFrameSamples.cs                       |  67 ++
 .../Samples/Microsoft.Spark.CSharp/Program.cs |   4 +-
 .../Microsoft.Spark.CSharp/Samples.csproj     |   8 +-
 .../Microsoft.Spark.CSharp/packages.config    |   3 +-
 csharp/Tests.Common/Tests.Common.csproj       |   7 +-
 .../FileSystem/HdfsFileStatus.cs              |  57 +
 .../FileSystem/HdfsFileSystemHelper.cs        |  58 +-
 .../Utils/Microsoft.Spark.CSharp/Utils.csproj |   1 +
 .../MultiThreadWorker.cs                      |   5 +-
 .../Microsoft.Spark.CSharp/TaskRunner.cs      | 204 ++--
 .../Microsoft.Spark.CSharp/UDFCommand.cs      | 391 +++++++
 .../Worker/Microsoft.Spark.CSharp/Worker.cs   | 449 +++-----
 .../Microsoft.Spark.CSharp/Worker.csproj      |   2 +
 .../Microsoft.Spark.CSharp/WorkerFunc.cs      |  25 +
 csharp/WorkerTest/MultiThreadWorkerTest.cs    |   7 +-
 csharp/WorkerTest/WorkerTest.cs               |  13 +
 csharp/WorkerTest/WorkerTest.csproj           |   5 +-
 examples/Batch/WordCount/WordCount.csproj     |  12 +-
 examples/Batch/pi/Pi.csproj                   |  12 +-
 examples/Examples.sln                         |   2 +-
 .../CassandraDataFrame.csproj                 |  17 +-
 .../Sql/HiveDataFrame/HiveDataFrame.csproj    |  18 +-
 .../Sql/JdbcDataFrame/JdbcDataFrame.csproj    |  12 +-
 examples/Sql/SparkXml/SparkXml.csproj         |  12 +-
 examples/Streaming/EventHub/EventHub.csproj   |  14 +-
 .../HdfsWordCount/HdfsWordCount.csproj        |  26 +-
 examples/Streaming/Kafka/Kafka.csproj         |  18 +-
 .../fsharp/JsonDataFrame/JsonDataFrame.fsproj |   8 +-
 .../fsharp/WordCount/WordCountFSharp.fsproj   |  11 +-
 notes/running-mobius-app.md                   |   2 +-
 scala/pom.xml                                 |  15 +-
 .../apache/spark/api/csharp/CSharpRDD.scala   |   5 +-
 .../spark/sql/api/csharp/SQLUtils.scala       |   3 +-
 .../org/apache/spark/util/csharp/Utils.scala  |   8 +-
 scripts/sparkclr-submit.cmd                   |   4 +-
 scripts/sparkclr-submit.sh                    |   4 +-
 92 files changed, 2117 insertions(+), 1378 deletions(-)
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Network/SocketInfo.cs
 create mode 100644 csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileStatus.cs
 create mode 100644 csharp/Worker/Microsoft.Spark.CSharp/UDFCommand.cs
 create mode 100644 csharp/Worker/Microsoft.Spark.CSharp/WorkerFunc.cs

diff --git a/README.md b/README.md
index b007ce3..f230369 100644
--- a/README.md
+++ b/README.md
@@ -157,4 +157,4 @@ Mobius is licensed under the MIT license. See [LICENSE](LICENSE) file for full l
   * tweet [@MobiusForSpark](http://twitter.com/MobiusForSpark)
 
 ## Code of Conduct
-This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
\ No newline at end of file
diff --git a/appveyor.yml b/appveyor.yml
index b7a50ce..4eb2774 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,4 +1,4 @@
-version: 2.0.2-SNAPSHOT.{build}
+version: 2.3.1-SNAPSHOT.{build}
 
 environment:
   securefile:          
diff --git a/build/Build.cmd b/build/Build.cmd
index 05239ac..485aa3b 100644
--- a/build/Build.cmd
+++ b/build/Build.cmd
@@ -6,6 +6,8 @@ rem Copyright (c) Microsoft. All rights reserved.
 rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
 rem
 
+SET MAVEN_OPTS=-Dhttps.protocols=TLSv1,TLSv1.1,TLSv1.2
+
 if "%1" == "csharp" set buildCSharp=true
 
 SET CMDHOME=%~dp0
diff --git a/build/localmode/RunSamples.cmd b/build/localmode/RunSamples.cmd
index b9690e3..57872f7 100644
--- a/build/localmode/RunSamples.cmd
+++ b/build/localmode/RunSamples.cmd
@@ -47,7 +47,7 @@ if "%precheck%" == "bad" (goto :EOF)
 @rem 
 @rem setup Hadoop and Spark versions
 @rem
-set SPARK_VERSION=2.0.2
+set SPARK_VERSION=2.3.1
 set HADOOP_VERSION=2.6
 set APACHE_DIST_SERVER=archive.apache.org
 @echo [RunSamples.cmd] SPARK_VERSION=%SPARK_VERSION%, HADOOP_VERSION=%HADOOP_VERSION%, APACHE_DIST_SERVER=%APACHE_DIST_SERVER%
@@ -100,7 +100,7 @@ if "!USER_EXE!"=="" (
     call sparkclr-submit.cmd --conf spark.sql.warehouse.dir=%TEMP_DIR% %*
 )
 
-@if ERRORLEVEL 1 GOTO :ErrorStop
+@if ERRORLEVEL 2 GOTO :ErrorStop
 
 @GOTO :EOF
 
diff --git a/build/localmode/downloadtools.ps1 b/build/localmode/downloadtools.ps1
index c42ab8a..512a23f 100644
--- a/build/localmode/downloadtools.ps1
+++ b/build/localmode/downloadtools.ps1
@@ -20,7 +20,7 @@ if ($stage.ToLower() -eq "run")
     $hadoopVersion = if ($envValue -eq $null) { "2.6" } else { $envValue }
     
     $envValue = [Environment]::GetEnvironmentVariable("SPARK_VERSION")
-    $sparkVersion = if ($envValue -eq $null) { "2.0.2" } else { $envValue }
+    $sparkVersion = if ($envValue -eq $null) { "2.3.1" } else { $envValue }
 
     Write-Output "[downloadtools] hadoopVersion=$hadoopVersion, sparkVersion=$sparkVersion, apacheDistServer=$apacheDistServer"
 }
diff --git a/build/localmode/run-samples.sh b/build/localmode/run-samples.sh
index 685507d..24d4f3d 100755
--- a/build/localmode/run-samples.sh
+++ b/build/localmode/run-samples.sh
@@ -16,7 +16,7 @@ do
 done
 
 # setup Hadoop and Spark versions
-export SPARK_VERSION=2.0.2
+export SPARK_VERSION=2.3.1
 export HADOOP_VERSION=2.6
 export APACHE_DIST_SERVER=archive.apache.org
 echo "[run-samples.sh] SPARK_VERSION=$SPARK_VERSION, HADOOP_VERSION=$HADOOP_VERSION, APACHE_DIST_SERVER=$APACHE_DIST_SERVER"
diff --git a/cpp/Riosock/Riosock.vcxproj b/cpp/Riosock/Riosock.vcxproj
index d61d067..95b642d 100644
--- a/cpp/Riosock/Riosock.vcxproj
+++ b/cpp/Riosock/Riosock.vcxproj
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|x64">
       <Configuration>Debug</Configuration>
@@ -20,13 +20,13 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v140</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v140</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
index d887daf..72341a3 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
@@ -35,16 +35,17 @@
     <ErrorReport>prompt</ErrorReport>
     <WarningLevel>4</WarningLevel>
     <DocumentationFile>..\documentation\Microsoft.Spark.CSharp.Adapter.Doc.XML</DocumentationFile>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <PropertyGroup>
     <StartupObject />
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="log4net">
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+    <Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <HintPath>..\..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Newtonsoft.Json">
-      <HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
+    <Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
+      <HintPath>..\..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
     <Reference Include="Razorvine.Pyrolite">
       <HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
@@ -98,6 +99,7 @@
     <Compile Include="Network\RioNative.cs" />
     <Compile Include="Network\RioSocketWrapper.cs" />
     <Compile Include="Network\SaeaSocketWrapper.cs" />
+    <Compile Include="Network\SocketInfo.cs" />
     <Compile Include="Network\SocketStream.cs" />
     <Compile Include="Network\SockDataToken.cs" />
     <Compile Include="Network\SocketFactory.cs" />
@@ -184,6 +186,7 @@
   <ItemGroup>
     <None Include="packages.config" />
   </ItemGroup>
+  <ItemGroup />
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <PropertyGroup>
     <PostBuildEvent>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/IRDDCollector.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/IRDDCollector.cs
index b8b078c..51250de 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/IRDDCollector.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/IRDDCollector.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Collections.Generic;
+using Microsoft.Spark.CSharp.Network;
 
 namespace Microsoft.Spark.CSharp.Core
 {
@@ -11,6 +12,6 @@ namespace Microsoft.Spark.CSharp.Core
     /// </summary>
     interface IRDDCollector
     {
-        IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type);
+        IEnumerable<dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type);
     }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
index bdfbd98..9dfd119 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
@@ -4,6 +4,7 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Proxy;
 using Microsoft.Spark.CSharp.Services;
 
@@ -60,6 +61,7 @@ namespace Microsoft.Spark.CSharp.Core
             {
                 return sparkContext;
             }
+	        set { sparkContext = value; }
         }
 
         /// <summary>
@@ -592,13 +594,13 @@ namespace Microsoft.Spark.CSharp.Core
         /// <returns></returns>
         public T[] Collect()
         {
-            int port = RddProxy.CollectAndServe();
-            return Collect(port).Cast<T>().ToArray();
+            var info = RddProxy.CollectAndServe();
+            return Collect(info).Cast<T>().ToArray();
         }
 
-        internal IEnumerable<dynamic> Collect(int port)
+        internal IEnumerable<dynamic> Collect(SocketInfo info)
         {
-            return RddProxy.RDDCollector.Collect(port, serializedMode, typeof(T));
+            return RddProxy.RDDCollector.Collect(info, serializedMode, typeof(T));
         }
 
         /// <summary>
@@ -830,9 +832,9 @@ namespace Microsoft.Spark.CSharp.Core
 
 
                 var mappedRDD = MapPartitionsWithIndex<T>(new TakeHelper<T>(left).Execute);
-                int port = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, partitions);
+                var info = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, partitions);
 
-                IEnumerable<T> res = Collect(port).Cast<T>();
+                IEnumerable<T> res = Collect(info).Cast<T>();
 
                 items.AddRange(res);
                 partsScanned += numPartsToTry;
@@ -925,7 +927,7 @@ namespace Microsoft.Spark.CSharp.Core
         /// <returns></returns>
         public RDD<T> Repartition(int numPartitions)
         {
-            return new RDD<T>(RddProxy.Repartition(numPartitions), sparkContext);
+            return new RDD<T>(RddProxy.Repartition(numPartitions), sparkContext, serializedMode);
         }
 
         /// <summary>
@@ -942,8 +944,8 @@ namespace Microsoft.Spark.CSharp.Core
         /// <returns></returns>
         public RDD<T> Coalesce(int numPartitions, bool shuffle = false)
         {
-            return new RDD<T>(RddProxy.Coalesce(numPartitions, shuffle), sparkContext);
-        }
+            return new RDD<T>(RddProxy.Coalesce(numPartitions, shuffle), sparkContext, serializedMode);
+		}
 
         /// <summary>
         /// Zips this RDD with another one, returning key-value pairs with the
@@ -1065,8 +1067,8 @@ namespace Microsoft.Spark.CSharp.Core
             foreach (int partition in Enumerable.Range(0, GetNumPartitions()))
             {
                 var mappedRDD = MapPartitionsWithIndex<T>((pid, iter) => iter);
-                int port = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, Enumerable.Range(partition, 1));
-                foreach (T row in Collect(port))
+                var info = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, Enumerable.Range(partition, 1));
+                foreach (T row in Collect(info))
                     yield return row;
             }
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs
index 6d92ad2..0596395 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs
@@ -11,6 +11,7 @@ using System.Runtime.Serialization.Formatters.Binary;
 using System.Text;
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Network;
+using Microsoft.Spark.CSharp.Services;
 using Microsoft.Spark.CSharp.Sql;
 
 namespace Microsoft.Spark.CSharp.Core
@@ -20,14 +21,31 @@ namespace Microsoft.Spark.CSharp.Core
     /// </summary>
     class RDDCollector : IRDDCollector
     {
-        public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
+		private static ILoggerService logger;
+		private static ILoggerService Logger
+		{
+			get
+			{
+				if (logger != null) return logger;
+				logger = LoggerServiceFactory.GetLogger(typeof(RDDCollector));
+				return logger;
+			}
+		}
+
+		public IEnumerable<dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type)
         {
             IFormatter formatter = new BinaryFormatter();
             var sock = SocketFactory.CreateSocket();
-            sock.Connect(IPAddress.Loopback, port);
+            sock.Connect(IPAddress.Loopback, info.Port, null);
 
             using (var s = sock.GetStream())
             {
+                if (info.Secret != null)
+                {
+                    SerDe.Write(s, info.Secret);
+                    var reply = SerDe.ReadString(s);
+                    Logger.LogDebug("Connect back to JVM: " + reply);
+                }
                 byte[] buffer;
                 while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0)
                 {
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs
index a3e6cd9..366ed96 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs
@@ -36,7 +36,7 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
             if (!sockets.TryDequeue(out socket))
             {
                 socket = SocketFactory.CreateSocket();
-                socket.Connect(IPAddress.Loopback, portNumber);
+                socket.Connect(IPAddress.Loopback, portNumber, null);
             }
             return socket;
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs
index 75c27e2..12cdd93 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs
@@ -12,12 +12,12 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
     /// Reference to object created in JVM
     /// </summary>
     [Serializable]
-    internal class JvmObjectReference
+    public class JvmObjectReference
     {
         public string Id { get; private set; }
         private DateTime creationTime;
 
-        public JvmObjectReference(string jvmReferenceId)
+        internal JvmObjectReference(string jvmReferenceId)
         {
             Id = jvmReferenceId;
             creationTime = DateTime.UtcNow;
@@ -48,6 +48,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
             return base.GetHashCode();
         }
 
+	    public string ObjectToString()
+	    {
+			return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "toString").ToString();
+		}
+
         public string GetDebugInfo()
         {
             var javaObjectReferenceForClassObject = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "getClass").ToString());
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs
index bee4625..befa7ee 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs
@@ -31,7 +31,9 @@ namespace Microsoft.Spark.CSharp.Interop
             }
         }
 
-        internal static IConfigurationService configurationService;
+		internal static IJvmBridge JvmBridge => SparkCLRIpcProxy.JvmBridge;
+
+	    internal static IConfigurationService configurationService;
 
         internal static IConfigurationService ConfigurationService
         {
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/ByteBuf.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/ByteBuf.cs
index 90a1179..57886d5 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/ByteBuf.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/ByteBuf.cs
@@ -11,7 +11,7 @@ namespace Microsoft.Spark.CSharp.Network
     /// ByteBuf delimits a section of a ByteBufChunk.
     /// It is the smallest unit to be allocated.
     /// </summary>
-    internal class ByteBuf
+    public class ByteBuf
     {
         private int readerIndex;
         private int writerIndex;
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs
index 3db32f5..8c96fcc 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs
@@ -2,182 +2,203 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Net;
 using System.Net.Sockets;
+using System.Text;
+using System.Threading;
 using Microsoft.Spark.CSharp.Configuration;
 using Microsoft.Spark.CSharp.Services;
 
 namespace Microsoft.Spark.CSharp.Network
 {
-    /// <summary>
-    /// A simple wrapper of System.Net.Sockets.Socket class.
-    /// </summary>
-    internal class DefaultSocketWrapper : ISocketWrapper
-    {
-        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(DefaultSocketWrapper));
-        private readonly Socket innerSocket;
+	/// <summary>
+	/// A simple wrapper of System.Net.Sockets.Socket class.
+	/// </summary>
+	internal class DefaultSocketWrapper : ISocketWrapper
+	{
+		private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(DefaultSocketWrapper));
+		private readonly Socket innerSocket;
 
-        /// <summary>
-        /// Default constructor that creates a new instance of DefaultSocket class which represents
-        /// a traditional socket (System.Net.Socket.Socket).
-        /// 
-        /// This socket is bound to Loopback with port 0.
-        /// </summary>
-        public DefaultSocketWrapper()
-        {
-            innerSocket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
-            var localEndPoint = new IPEndPoint(IPAddress.Loopback, 0);
-            innerSocket.Bind(localEndPoint);
-        }
+		/// <summary>
+		/// Default constructor that creates a new instance of DefaultSocket class which represents
+		/// a traditional socket (System.Net.Socket.Socket).
+		/// 
+		/// This socket is bound to Loopback with port 0.
+		/// </summary>
+		public DefaultSocketWrapper()
+		{
+			innerSocket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
+			var localEndPoint = new IPEndPoint(IPAddress.Loopback, 0);
+			innerSocket.Bind(localEndPoint);
+		}
 
-        /// <summary>
-        /// Initializes a instance of DefaultSocket class using the specified System.Net.Socket.Socket object.
-        /// </summary>
-        /// <param name="socket">The existing socket</param>
-        private DefaultSocketWrapper(Socket socket)
-        {
-            innerSocket = socket;
-        }
+		/// <summary>
+		/// Initializes a instance of DefaultSocket class using the specified System.Net.Socket.Socket object.
+		/// </summary>
+		/// <param name="socket">The existing socket</param>
+		private DefaultSocketWrapper(Socket socket)
+		{
+			innerSocket = socket;
+		}
 
-        /// <summary>
-        /// Accepts a incoming connection request.
-        /// </summary>
-        /// <returns>A DefaultSocket instance used to send and receive data</returns>
-        public ISocketWrapper Accept()
-        {
-            var socket = innerSocket.Accept();
-            return new DefaultSocketWrapper(socket);
-        }
+		/// <summary>
+		/// Accepts a incoming connection request.
+		/// </summary>
+		/// <returns>A DefaultSocket instance used to send and receive data</returns>
+		public ISocketWrapper Accept()
+		{
+			var socket = innerSocket.Accept();
+			return new DefaultSocketWrapper(socket);
+		}
 
-        /// <summary>
-        /// Close the socket connections and releases all associated resources.
-        /// </summary>
-        public void Close()
-        {
-            innerSocket.Close();
-        }
+		/// <summary>
+		/// Close the socket connections and releases all associated resources.
+		/// </summary>
+		public void Close()
+		{
+			innerSocket.Close();
+		}
 
-        /// <summary>
-        /// Establishes a connection to a remote host that is specified by an IP address and a port number
-        /// </summary>
-        /// <param name="remoteaddr">The IP address of the remote host</param>
-        /// <param name="port">The port number of the remote host</param>
-        public void Connect(IPAddress remoteaddr, int port)
-        {
-            var remoteEndPoint = new IPEndPoint(remoteaddr, port);
-            innerSocket.Connect(remoteEndPoint);
-        }
+		/// <summary>
+		/// Establishes a connection to a remote host that is specified by an IP address and a port number
+		/// </summary>
+		/// <param name="remoteaddr">The IP address of the remote host</param>
+		/// <param name="port">The port number of the remote host</param>
+		public void Connect(IPAddress remoteaddr, int port, string secret)
+		{
+			var remoteEndPoint = new IPEndPoint(remoteaddr, port);
+			innerSocket.Connect(remoteEndPoint);
+		}
 
-        /// <summary>
-        /// Returns the NetworkStream used to send and receive data.
-        /// </summary>
-        /// <returns>The underlying Stream instance that be used to send and receive data</returns>
-        /// <remarks>
-        /// GetStream returns a NetworkStream that you can use to send and receive data. You must close/dispose
-        /// the NetworkStream by yourself. Closing DefaultSocketWrapper does not release the NetworkStream
-        /// </remarks>
-        public Stream GetStream()
-        {
-            return new NetworkStream(innerSocket);
-        }
+		private static byte[] ReceiveAll(Socket socket, int len)
+		{
+			var buffer = new List<byte>();
 
-        /// <summary>
-        /// Returns a stream used to receive data only.
-        /// </summary>
-        /// <returns>The underlying Stream instance that be used to receive data</returns>
-        public Stream GetInputStream()
-        {
-            // The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
-            var readBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerReadBufferSizeEnvName) ?? "65536");
-            logger.LogDebug("Input stream buffer size: [{0}]", readBufferSize);
-            return readBufferSize > 0 ? new BufferedStream(GetStream(), readBufferSize) : GetStream();
-        }
+			while (socket.Available > 0 && buffer.Count < len)
+			{
+				var currByte = new Byte[1];
+				var byteCounter = socket.Receive(currByte, currByte.Length, SocketFlags.None);
 
-        /// <summary>
-        /// Returns a stream used to send data only.
-        /// </summary>
-        /// <returns>The underlying Stream instance that be used to send data</returns>
-        public Stream GetOutputStream()
-        {
-            // The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
-            var writeBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerWriteBufferSizeEnvName) ?? "65536");
-            logger.LogDebug("Output stream buffer size: [{0}]", writeBufferSize);
-            return writeBufferSize > 0 ? new BufferedStream(GetStream(), writeBufferSize) : GetStream();
-        }
+				if (byteCounter.Equals(1))
+				{
+					buffer.Add(currByte[0]);
+				}
+			}
 
-        /// <summary>
-        /// Starts listening for incoming connections requests
-        /// </summary>
-        /// <param name="backlog">The maximum length of the pending connections queue. </param>
-        public void Listen(int backlog = 16)
-        {
-            innerSocket.Listen(backlog);
-        }
+			return buffer.ToArray();
+		}
 
-        /// <summary>
-        /// Receives network data from this socket, and returns a ByteBuf that contains the received data.
-        /// 
-        /// The DefaultSocketWrapper does not support this function.
-        /// </summary>
-        /// <returns>A ByteBuf object that contains received data.</returns>
-        public ByteBuf Receive()
-        {
-            throw new NotImplementedException();
-        }
+		/// <summary>
+		/// Returns the NetworkStream used to send and receive data.
+		/// </summary>
+		/// <returns>The underlying Stream instance that be used to send and receive data</returns>
+		/// <remarks>
+		/// GetStream returns a NetworkStream that you can use to send and receive data. You must close/dispose
+		/// the NetworkStream by yourself. Closing DefaultSocketWrapper does not release the NetworkStream
+		/// </remarks>
+		public Stream GetStream()
+		{
+			return new NetworkStream(innerSocket);
+		}
 
-        /// <summary>
-        /// Sends data to this socket with a ByteBuf object that contains data to be sent.
-        /// 
-        /// The DefaultSocketWrapper does not support this function.
-        /// </summary>
-        /// <param name="data">A ByteBuf object that contains data to be sent</param>
-        public void Send(ByteBuf data)
-        {
-            throw new NotImplementedException();
-        }
+		/// <summary>
+		/// Returns a stream used to receive data only.
+		/// </summary>
+		/// <returns>The underlying Stream instance that be used to receive data</returns>
+		public Stream GetInputStream()
+		{
+			// The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
+			var readBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerReadBufferSizeEnvName) ?? "65536");
+			logger.LogDebug("Input stream buffer size: [{0}]", readBufferSize);
+			return readBufferSize > 0 ? new BufferedStream(GetStream(), readBufferSize) : GetStream();
+		}
 
-        /// <summary>
-        /// Disposes the resources used by this instance of the DefaultSocket class.
-        /// </summary>
-        /// <param name="disposing"></param>
-        protected virtual void Dispose(bool disposing)
-        {
-            if (disposing)
-            {
-                innerSocket.Dispose();
-            }
-        }
+		/// <summary>
+		/// Returns a stream used to send data only.
+		/// </summary>
+		/// <returns>The underlying Stream instance that be used to send data</returns>
+		public Stream GetOutputStream()
+		{
+			// The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
+			var writeBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerWriteBufferSizeEnvName) ?? "65536");
+			logger.LogDebug("Output stream buffer size: [{0}]", writeBufferSize);
+			return writeBufferSize > 0 ? new BufferedStream(GetStream(), writeBufferSize) : GetStream();
+		}
 
-        /// <summary>
-        /// Releases all resources used by the current instance of the DefaultSocket class.
-        /// </summary>
-        public void Dispose()
-        {
-            Dispose(true);
-        }
+		/// <summary>
+		/// Starts listening for incoming connections requests
+		/// </summary>
+		/// <param name="backlog">The maximum length of the pending connections queue. </param>
+		public void Listen(int backlog = 16)
+		{
+			innerSocket.Listen(backlog);
+		}
 
-        /// <summary>
-        /// Frees resources used by DefaultSocket class
-        /// </summary>
-        ~DefaultSocketWrapper()
-        {
-            Dispose(false);
-        }
+		/// <summary>
+		/// Receives network data from this socket, and returns a ByteBuf that contains the received data.
+		/// 
+		/// The DefaultSocketWrapper does not support this function.
+		/// </summary>
+		/// <returns>A ByteBuf object that contains received data.</returns>
+		public ByteBuf Receive()
+		{
+			throw new NotImplementedException();
+		}
 
-        /// <summary>
-        /// Indicates whether there are data that has been received from the network and is available to be read.
-        /// </summary>
-        public bool HasData { get { return innerSocket.Available > 0; } }
+		/// <summary>
+		/// Sends data to this socket with a ByteBuf object that contains data to be sent.
+		/// 
+		/// The DefaultSocketWrapper does not support this function.
+		/// </summary>
+		/// <param name="data">A ByteBuf object that contains data to be sent</param>
+		public void Send(ByteBuf data)
+		{
+			throw new NotImplementedException();
+		}
 
-        /// <summary>
-        /// Returns the local endpoint.
-        /// </summary>
-        public EndPoint LocalEndPoint { get { return innerSocket.LocalEndPoint; } }
+		/// <summary>
+		/// Disposes the resources used by this instance of the DefaultSocket class.
+		/// </summary>
+		/// <param name="disposing"></param>
+		protected virtual void Dispose(bool disposing)
+		{
+			if (disposing)
+			{
+				innerSocket.Dispose();
+			}
+		}
 
-        /// <summary>
-        /// Returns the remote endpoint if it has one.
-        /// </summary>
-        public EndPoint RemoteEndPoint { get { return innerSocket.RemoteEndPoint; } }
-    }
+		/// <summary>
+		/// Releases all resources used by the current instance of the DefaultSocket class.
+		/// </summary>
+		public void Dispose()
+		{
+			Dispose(true);
+		}
+
+		/// <summary>
+		/// Frees resources used by DefaultSocket class
+		/// </summary>
+		~DefaultSocketWrapper()
+		{
+			Dispose(false);
+		}
+
+		/// <summary>
+		/// Indicates whether there are data that has been received from the network and is available to be read.
+		/// </summary>
+		public bool HasData { get { return innerSocket.Available > 0; } }
+
+		/// <summary>
+		/// Returns the local endpoint.
+		/// </summary>
+		public EndPoint LocalEndPoint { get { return innerSocket.LocalEndPoint; } }
+
+		/// <summary>
+		/// Returns the remote endpoint if it has one.
+		/// </summary>
+		public EndPoint RemoteEndPoint { get { return innerSocket.RemoteEndPoint; } }
+	}
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs
index b08dcd6..45b61d2 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs
@@ -11,7 +11,7 @@ namespace Microsoft.Spark.CSharp.Network
     /// ISocketWrapper interface defines the common methods to operate a socket (traditional socket or 
     /// Windows Registered IO socket)
     /// </summary>
-    internal interface ISocketWrapper : IDisposable
+    public interface ISocketWrapper : IDisposable
     {
         /// <summary>
         /// Accepts a incoming connection request.
@@ -24,12 +24,13 @@ namespace Microsoft.Spark.CSharp.Network
         /// </summary>
         void Close();
 
-        /// <summary>
-        /// Establishes a connection to a remote host that is specified by an IP address and a port number
-        /// </summary>
-        /// <param name="remoteaddr">The IP address of the remote host</param>
-        /// <param name="port">The port number of the remote host</param>
-        void Connect(IPAddress remoteaddr, int port);
+	    /// <summary>
+	    /// Establishes a connection to a remote host that is specified by an IP address and a port number
+	    /// </summary>
+	    /// <param name="remoteaddr">The IP address of the remote host</param>
+	    /// <param name="port">The port number of the remote host</param>
+	    /// <param name="secret">The secret to connect, can be null</param>
+	    void Connect(IPAddress remoteaddr, int port, string secret);
 
         /// <summary>
         /// Returns a stream used to send and receive data.
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/RioSocketWrapper.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/RioSocketWrapper.cs
index 740787f..54e73ed 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/RioSocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/RioSocketWrapper.cs
@@ -151,7 +151,7 @@ namespace Microsoft.Spark.CSharp.Network
         /// </summary>
         /// <param name="remoteaddr">The IP address of the remote host</param>
         /// <param name="port">The port number of the remote host</param>
-        public void Connect(IPAddress remoteaddr, int port)
+        public void Connect(IPAddress remoteaddr, int port, string secret)
         {
             EnsureAccessible();
 
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/SaeaSocketWrapper.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/SaeaSocketWrapper.cs
index cb8ed0f..505bf96 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/SaeaSocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/SaeaSocketWrapper.cs
@@ -111,7 +111,7 @@ namespace Microsoft.Spark.CSharp.Network
         /// </summary>
         /// <param name="remoteaddr">The IP address of the remote host</param>
         /// <param name="port">The port number of the remote host</param>
-        public void Connect(IPAddress remoteaddr, int port)
+        public void Connect(IPAddress remoteaddr, int port, string secret)
         {
             var remoteEndPoint = new IPEndPoint(remoteaddr, port);
             innerSocket.Connect(remoteEndPoint);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/SocketInfo.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/SocketInfo.cs
new file mode 100644
index 0000000..d14e5cc
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/SocketInfo.cs
@@ -0,0 +1,28 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+
+namespace Microsoft.Spark.CSharp.Network
+{
+	public class SocketInfo
+	{
+		public readonly int Port;
+		public readonly string Secret;
+
+		public SocketInfo(int port, string secret)
+		{
+			Port = port;
+			Secret = secret;
+		}
+
+		public static SocketInfo Parse(object o)
+		{
+			var oo = o as List<JvmObjectReference>;
+			if (oo == null) throw new Exception(o.ToString() + " is not socket info "+typeof(List<JvmObjectReference>)+" "+o.GetType());
+			return new SocketInfo(int.Parse(oo[0].ObjectToString()), oo[1].ObjectToString());
+		}
+	}
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs
index 9928523..87071d9 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs
@@ -13,7 +13,7 @@ namespace Microsoft.Spark.CSharp.Proxy
         IRDDProxy JavaToCSharp();
         string GetQueryExecution();
         string GetExecutedPlan();
-        string GetShowString(int numberOfRows, bool truncate);
+        string GetShowString(int numberOfRows, int truncate, bool vertical);
         bool IsLocal();
         IStructTypeProxy GetSchema();
         IRDDProxy ToJSON();
@@ -59,7 +59,9 @@ namespace Microsoft.Spark.CSharp.Proxy
         IDataFrameProxy Repartition(int numPartitions, IColumnProxy[] columns);
         IDataFrameProxy Repartition(IColumnProxy[] columns);
         IDataFrameProxy Sample(bool withReplacement, double fraction, long seed);
-        IDataFrameWriterProxy Write();
+	    IDataFrameProxy Broadcast();
+
+		IDataFrameWriterProxy Write();
     }
 
     internal interface IUDFProxy
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
index e323cf4..24788c0 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
@@ -7,6 +7,7 @@ using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Network;
 
 namespace Microsoft.Spark.CSharp.Proxy
 {
@@ -41,6 +42,6 @@ namespace Microsoft.Spark.CSharp.Proxy
         void SaveAsSequenceFile(string path, string compressionCodecClass);
         void SaveAsTextFile(string path, string compressionCodecClass);
         long Count();
-        int CollectAndServe();
+        SocketInfo CollectAndServe();
     }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
index f1a00ac..a53fdab 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
@@ -8,6 +8,7 @@ using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop;
+using Microsoft.Spark.CSharp.Network;
 
 
 namespace Microsoft.Spark.CSharp.Proxy
@@ -50,7 +51,7 @@ namespace Microsoft.Spark.CSharp.Proxy
         void CancelJobGroup(string groupId);
         void CancelAllJobs();
         IStatusTrackerProxy StatusTracker { get; }
-        int RunJob(IRDDProxy rdd, IEnumerable<int> partitions);
+        SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions);
         IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId);
         IRDDProxy CreateCSharpRdd(IRDDProxy prefvJavaRddReference, byte[] command, Dictionary<string, string> environmentVariables, List<string> pythonIncludes, bool preservePartitioning, List<Broadcast> broadcastVariables, List<byte[]> accumulator);
         IRDDProxy CreatePairwiseRDD(IRDDProxy javaReferenceInByteArrayRdd, int numPartitions, long partitionFuncId);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs
index 177d33c..85c1210 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs
@@ -79,12 +79,12 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
             return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(executedPlanReference, "toString", new object[] { }).ToString();
         }
 
-        public string GetShowString(int numberOfRows, bool truncate)
+        public string GetShowString(int numberOfRows, int truncate, bool vertical)
         {
             return
                 SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
                     jvmDataFrameReference, "showString",
-                    new object[] { numberOfRows, truncate }).ToString(); 
+                    new object[] { numberOfRows, truncate, vertical}).ToString(); 
         }
 
         public bool IsLocal()
@@ -575,7 +575,16 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
                         new object[] { withReplacement, fraction, seed }).ToString()), sqlContextProxy);
         }
 
-        public IDataFrameWriterProxy Write()
+		public IDataFrameProxy Broadcast()
+		{
+			return
+				new DataFrameIpcProxy(
+					new JvmObjectReference(
+						SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.functions", "broadcast",
+							new object[] { jvmDataFrameReference }).ToString()), sqlContextProxy);
+		}
+
+		public IDataFrameWriterProxy Write()
         {
             return new DataFrameWriterIpcProxy(new JvmObjectReference(
                     SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDataFrameReference, "write").ToString()));
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
index 9377c07..3ef6577 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
@@ -12,6 +12,7 @@ using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop;
 using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Network;
 
 namespace Microsoft.Spark.CSharp.Proxy.Ipc
 {
@@ -66,10 +67,10 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
             return long.Parse(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(rdd, "count").ToString());
         }
 
-        public int CollectAndServe()
+        public SocketInfo CollectAndServe()
         {
             var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd"));
-            return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "collectAndServe", new object[] { rdd }).ToString());
+            return SocketInfo.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "collectAndServe", new object[] { rdd }));
         }
 
 
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
index 01290fd..f48aa52 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
@@ -11,6 +11,7 @@ using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop;
 using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Proxy.Ipc;
 
 namespace Microsoft.Spark.CSharp.Proxy.Ipc
@@ -134,10 +135,8 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
 
         public void Accumulator(int port)
         {
-            jvmAccumulatorReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "accumulator", 
-                SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList"),
-                SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonAccumulatorParam", IPAddress.Loopback.ToString(), port)
-            ));
+            jvmAccumulatorReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonAccumulatorV2", IPAddress.Loopback.ToString(), port);
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkContextReference, "register", new object[] { jvmAccumulatorReference });
         }
 
         public void Stop()
@@ -241,7 +240,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
 
         public void SetJobGroup(string groupId, string description, bool interruptOnCancel)
         {
-            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "setCheckpointDir", new object[] { groupId, description, interruptOnCancel });
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "setJobGroup", new object[] { groupId, description, interruptOnCancel });
         }
 
         public void SetLocalProperty(string key, string value)
@@ -344,10 +343,10 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
 
         }
         
-        public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
+        public SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
         {
             var jpartitions = JvmBridgeUtils.GetJavaList<int>(partitions);
-            return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }).ToString());
+            return SocketInfo.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }));
         }
 
         public IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId)
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
index febfd3b..bc6e5a1 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
@@ -27,7 +27,9 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
             }
         }
 
-        public ISqlContextProxy SqlContextProxy
+	    internal JvmObjectReference JvmReference => jvmSparkSessionReference;
+
+		public ISqlContextProxy SqlContextProxy
         {
             get { return sqlContextProxy; }
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
index 4bb930f..d6f0098 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
@@ -106,7 +106,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
 
             var udf = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.execution.python.UserDefinedPythonFunction", new object[]
                 {
-                    name, function, dt
+                    name, function, dt, 100 /*BatchUDF*/, true /*deterministic*/
                 });
 
             SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython", new object[] { name, udf });
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
index 66601ca..b288baa 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
@@ -6,7 +6,9 @@ using System.Collections.Generic;
 using System.Globalization;
 using System.Linq;
 using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
 using Microsoft.Spark.CSharp.Services;
 
 namespace Microsoft.Spark.CSharp.Sql
@@ -66,10 +68,12 @@ namespace Microsoft.Spark.CSharp.Sql
             }
         }
 
-        /// <summary>
-        /// Returns true if the collect and take methods can be run locally (without any Spark executors).
-        /// </summary>
-        public bool IsLocal
+	    internal JvmObjectReference JvmReference => (dataFrameProxy as DataFrameIpcProxy)?.JvmDataFrameReference;
+
+		/// <summary>
+		/// Returns true if the collect and take methods can be run locally (without any Spark executors).
+		/// </summary>
+		public bool IsLocal
         {
             get
             {
@@ -145,10 +149,11 @@ namespace Microsoft.Spark.CSharp.Sql
         /// </summary>
         /// <param name="numberOfRows">Number of rows to display - default 20</param>
         /// <param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
-        public void Show(int numberOfRows = 20, bool truncate = true)
+        /// <param name="vertical">If set to True, print output rows vertically (one line per column value).</param>
+        public void Show(int numberOfRows = 20, int truncate = 20, bool vertical = false)
         {
             logger.LogInfo("Writing {0} rows in the DataFrame to Console output", numberOfRows);
-            Console.WriteLine(dataFrameProxy.GetShowString(numberOfRows, truncate));
+            Console.WriteLine(dataFrameProxy.GetShowString(numberOfRows, truncate, vertical));
         }
 
         /// <summary>
@@ -166,8 +171,8 @@ namespace Microsoft.Spark.CSharp.Sql
         /// </summary>
         public IEnumerable<Row> Collect()
         {
-            int port = RddProxy.CollectAndServe();
-            return Rdd.Collect(port).Cast<Row>();
+            var info = RddProxy.CollectAndServe();
+            return Rdd.Collect(info).Cast<Row>();
         }
 
         //TODO - add this method if needed to convert Row to collection of T
@@ -917,10 +922,11 @@ namespace Microsoft.Spark.CSharp.Sql
         /// <summary>
         /// Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
         /// </summary>
+        /// <param name="type">Persist storage type</param>
         // Python API: https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py persist(self, storageLevel)
-        public DataFrame Persist()
+        public DataFrame Persist(StorageLevelType type= StorageLevelType.MEMORY_AND_DISK)
         {
-            dataFrameProxy.Persist(StorageLevelType.MEMORY_AND_DISK);
+            dataFrameProxy.Persist(type);
             return this;
         }
 
@@ -944,6 +950,11 @@ namespace Microsoft.Spark.CSharp.Sql
             return Persist();
         }
 
+	    public DataFrame Broadcast()
+	    {
+			return new DataFrame(dataFrameProxy.Broadcast(), sparkContext);
+		}
+
         /// <summary>
         /// Returns a new DataFrame that has exactly `numPartitions` partitions.
         /// </summary>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs
index 04fcc90..c27700e 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs
@@ -159,5 +159,18 @@ namespace Microsoft.Spark.CSharp.Sql
             logger.LogInfo("Constructing DataFrame using Parquet source {0}", string.Join(";", path));
             return new DataFrame(dataFrameReaderProxy.Parquet(path), sparkContext);
         }
-    }
+
+		/// <summary>
+		/// Loads a AVRO file (one object per line) and returns the result as a DataFrame.
+		/// 
+		/// This function goes through the input once to determine the input schema. If you know the
+		/// schema in advance, use the version that specifies the schema to avoid the extra scan.
+		/// </summary>
+		/// <param name="path">input path</param>
+		public DataFrame Avro(string path)
+		{
+			logger.LogInfo("Constructing DataFrame using AVRO source {0}", path);
+			return Format("com.databricks.spark.avro").Load(path);
+		}
+	}
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs
index a16478d..9fa9fdb 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs
@@ -170,5 +170,16 @@ namespace Microsoft.Spark.CSharp.Sql
         {
             Format("parquet").Save(path);
         }
-    }
+
+		/// <summary>
+		/// Saves the content of the DataFrame in AVRO format at the specified path.
+		/// This is equivalent to:
+		///    Format("com.databricks.spark.avro").Save(path)
+		/// </summary>
+		public void Avro(string path)
+		{
+			Format("com.databricks.spark.avro").Save(path);
+		}
+
+	}
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs
index b3a81cf..bc89168 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs
@@ -92,7 +92,8 @@ namespace Microsoft.Spark.CSharp.Sql
         /// </summary>
         /// <param name="numberOfRows">Number of rows - default is 20</param>
         /// <param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
-        public void Show(int numberOfRows = 20, bool truncate = true)
+        /// <param name="vertical">If set to true, prints output rows vertically (one line per column value).</param>
+        public void Show(int numberOfRows = 20, int truncate = 20, bool vertical = false)
         {
             ToDF().Show(numberOfRows, truncate);
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs
index c9166fe..a23d91a 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs
@@ -4,6 +4,8 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Reflection;
+using System.Runtime.Serialization;
 using System.Text;
 using System.Threading.Tasks;
 
@@ -1119,5 +1121,42 @@ namespace Microsoft.Spark.CSharp.Sql
             return input.Select(a => func((A1)(a[0]), (A2)(a[1]), (A3)(a[2]), (A4)(a[3]), (A5)(a[4]), (A6)(a[5]), (A7)(a[6]), (A8)(a[7]), (A9)(a[8]), (A10)(a[9]))).Cast<dynamic>();
         }
     }
-    #endregion
+
+	[Serializable]
+	internal class UdfReflectionHelper
+	{
+		private readonly MethodInfo func;
+
+		[NonSerialized]
+		private object[] _cache;
+
+		internal UdfReflectionHelper(MethodInfo f)
+		{
+			func = f;
+			_cache = new object[func.GetParameters().Length];
+		}
+
+		public Type ReturnType => func.ReturnType;
+
+		[OnDeserialized()]
+		public void Init(StreamingContext context)
+		{
+			_cache = new object[func.GetParameters().Length];
+		}
+
+		internal IEnumerable<dynamic> Execute(int pid, IEnumerable<dynamic> input)
+		{
+			return input.Select(Run).Cast<dynamic>();
+		}
+
+		private dynamic Run(dynamic input)
+		{
+			for (int i = 0; i < _cache.Length; ++i)
+			{
+				_cache[i] = input[i];
+			}
+			return func.Invoke(null, _cache);
+		}
+	}
+	#endregion
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Row.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Row.cs
index 77614a7..a299d1a 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Row.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Row.cs
@@ -18,17 +18,24 @@ namespace Microsoft.Spark.CSharp.Sql
         [NonSerialized]
         private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(Row));
 
-        /// <summary>
-        /// Number of elements in the Row.
-        /// </summary>
-        /// <returns>elements count in this row</returns>
-        public abstract int Size();
+	    public abstract dynamic[] Values { get; }
+
+		/// <summary>
+		/// Number of elements in the Row.
+		/// </summary>
+		/// <returns>elements count in this row</returns>
+		public abstract int Size();
 
         /// <summary>
         /// Schema for the row.
         /// </summary>
         public abstract StructType GetSchema();
 
+	    public virtual void ResetValues(dynamic[] values)
+	    {
+		    throw new NotImplementedException();
+	    }
+
         /// <summary>
         /// Returns the value at position i.
         /// </summary>
@@ -80,8 +87,22 @@ namespace Microsoft.Spark.CSharp.Sql
     internal class RowImpl : Row
     {
         private readonly StructType schema;
-        public dynamic[] Values { get { return values; } }
-        private readonly dynamic[] values;
+
+	    public override dynamic[] Values
+	    {
+		    get
+		    {
+			    if (!valuesConverted)
+			    {
+				    schema.ConvertPickleObjects(rawValues,rawValues);
+				    valuesConverted = true;
+			    }
+			    return rawValues;
+		    }
+	    }
+
+        private dynamic[] rawValues;
+	    private bool valuesConverted = false;
 
         private readonly int columnCount;
 
@@ -96,11 +117,11 @@ namespace Microsoft.Spark.CSharp.Sql
         {
             if (data is dynamic[])
             {
-                values = data as dynamic[];
+				rawValues = data as dynamic[];
             }
             else if (data is List<dynamic>)
             {
-                values = (data as List<dynamic>).ToArray();
+				rawValues = (data as List<dynamic>).ToArray();
             }
             else
             {
@@ -109,17 +130,25 @@ namespace Microsoft.Spark.CSharp.Sql
 
             this.schema = schema;
             
-            columnCount = values.Count();
-            int schemaColumnCount = this.schema.Fields.Count();
+            columnCount = rawValues.Length;
+            int schemaColumnCount = this.schema.Fields.Count;
             if (columnCount != schemaColumnCount)
             {
                 throw new Exception(string.Format("column count inferred from data ({0}) and schema ({1}) mismatch", columnCount, schemaColumnCount));
             }
-
-            Initialize();
         }
 
-        public override int Size()
+	    public override void ResetValues(dynamic[] values)
+	    {
+			if (columnCount != values.Length)
+			{
+				throw new ArgumentException("column count inferred from data and schema mismatch");
+			}
+			rawValues = values;
+		    valuesConverted = false;
+		}
+
+	    public override int Size()
         {
             return columnCount;
         }
@@ -131,16 +160,15 @@ namespace Microsoft.Spark.CSharp.Sql
 
         public override dynamic Get(int i)
         {
+	        if (i >= 0 && i < columnCount) return Values[i];
             if (i >= columnCount)
             {
                 throw new Exception(string.Format("i ({0}) >= columnCount ({1})", i, columnCount));
             }
-            else if(i < 0)
+            else
             {
                 throw new Exception(string.Format("i ({0}) < 0", i));
             }
-
-            return values[i];
         }
 
         public override dynamic Get(string columnName)
@@ -152,7 +180,7 @@ namespace Microsoft.Spark.CSharp.Sql
         public override string ToString()
         {
             List<string> cols = new List<string>();
-            foreach (var item in values)
+            foreach (var item in Values)
             {
                 if (item != null)
                 {
@@ -166,73 +194,7 @@ namespace Microsoft.Spark.CSharp.Sql
 
             return string.Format("[{0}]", string.Join(",", cols.ToArray()));
         }
-
-
-        private void Initialize()
-        {
-
-            int index = 0;
-            foreach (var field in schema.Fields)
-            {
-                if (field.DataType is ArrayType)
-                {
-                    Func<DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
-                                                                                      {
-                                                                                          StructField[] fields = new StructField[length];
-                                                                                          for(int i = 0; i < length ; i++)
-                                                                                          {
-                                                                                              fields[i] = new StructField(string.Format("_array_{0}", i), dataType);
-                                                                                          }
-                                                                                          return new StructType(fields);
-                                                                                      };
-                    var elementType = (field.DataType as ArrayType).ElementType;
-
-                    // Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)), 
-                    // then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
-                    // In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
-                    object[] valueOfArray = values[index] is ArrayList ? (values[index] as ArrayList).ToArray() : values[index] as object[];
-                    if (valueOfArray == null)
-                    {
-                        throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
-                    }
-
-                    values[index] = new RowImpl(valueOfArray, elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).values;
-                }
-                else if (field.DataType is MapType)
-                {
-                    //TODO
-                    throw new NotImplementedException();
-                }
-                else if (field.DataType is StructType)
-                {
-                    dynamic value = values[index];
-                    if (value != null)
-                    {
-                        var subRow = new RowImpl(values[index], field.DataType as StructType);
-                        values[index] = subRow;
-                    }
-                }
-                else if (field.DataType is DecimalType)
-                {
-                    //TODO
-                    throw new NotImplementedException();
-                }
-                else if (field.DataType is DateType)
-                {
-                    //TODO
-                    throw new NotImplementedException();
-                }
-                else if (field.DataType is StringType)
-                {
-                    if (values[index] != null) values[index] = values[index].ToString();
-                }
-                else
-                {
-                    values[index] = values[index];
-                }
-                index++;
-            }
-        }
+        
     }
 
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs
index 96b50c2..25726ad 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs
@@ -78,7 +78,7 @@ namespace Microsoft.Spark.CSharp.Sql
             currentSchema = null;
             return row;
         }
-
+		
         //removes objects of type RowConstructor and replacing them with actual values
         private object[] GetValues(object[] arguments)
         {
@@ -86,7 +86,7 @@ namespace Microsoft.Spark.CSharp.Sql
             int i = 0;
             foreach (var argument in arguments)
             {
-                if (argument != null && argument.GetType() == typeof(RowConstructor))
+                if (argument is RowConstructor)
                 {
                     values[i++] = (argument as RowConstructor).Values;
                 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
index c4f7288..aa70216 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
@@ -9,7 +9,9 @@ using System.Runtime.Remoting.Contexts;
 using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
 using Microsoft.Spark.CSharp.Services;
 using Microsoft.Spark.CSharp.Sql.Catalog;
 
@@ -42,10 +44,12 @@ namespace Microsoft.Spark.CSharp.Sql
             get { return catalog ?? (catalog = new Catalog.Catalog(SparkSessionProxy.GetCatalog())); }
         }
 
-        /// <summary>
-        /// Interface through which the user may access the underlying SparkContext.
-        /// </summary>
-        public SparkContext SparkContext { get; private set; }
+		internal JvmObjectReference JvmReference => (sparkSessionProxy as SparkSessionIpcProxy)?.JvmReference;
+
+		/// <summary>
+		/// Interface through which the user may access the underlying SparkContext.
+		/// </summary>
+		public SparkContext SparkContext { get; private set; }
 
         public UdfRegistration Udf
         {
@@ -114,18 +118,30 @@ namespace Microsoft.Spark.CSharp.Sql
             // The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
             // In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside. 
             // It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
-            var rddRow = rdd.Map(r => r);
+	        var rddRow = rdd.MapPartitions(r => r.Select(rr => rr));
             rddRow.serializedMode = SerializedMode.Row;
 
             return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), SparkContext);
         }
 
-        /// <summary>
-        /// Returns the specified table as a <see cref="DataFrame"/>
-        /// </summary>
-        /// <param name="tableName"></param>
-        /// <returns></returns>
-        public DataFrame Table(string tableName)
+		public DataFrame CreateDataFrame(RDD<Row> rdd, StructType schema)
+		{
+			// Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker. 
+			// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
+			// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside. 
+			// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
+			var rddRow = rdd.MapPartitions(rows => rows.Select(r => r.Values));
+			rddRow.serializedMode = SerializedMode.Row;
+
+			return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), SparkContext);
+		}
+
+		/// <summary>
+		/// Returns the specified table as a <see cref="DataFrame"/>
+		/// </summary>
+		/// <param name="tableName"></param>
+		/// <returns></returns>
+		public DataFrame Table(string tableName)
         {
             return new DataFrame(sparkSessionProxy.Table(tableName), SparkContext);
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
index 03e9fb2..c99e901 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Collections.Generic;
+using System.Reflection;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Proxy;
 using Microsoft.Spark.CSharp.Services;
@@ -150,13 +151,25 @@ namespace Microsoft.Spark.CSharp.Sql
             return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
         }
 
-        /// <summary>
-        /// Registers the given <see cref="DataFrame"/> as a temporary table in the catalog.
-        /// Temporary tables exist only during the lifetime of this instance of SqlContext.
-        /// </summary>
-        /// <param name="dataFrame"></param>
-        /// <param name="tableName"></param>
-        public void RegisterDataFrameAsTable(DataFrame dataFrame, string tableName)
+		public DataFrame CreateDataFrame(RDD<Row> rdd, StructType schema)
+		{
+			// Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker. 
+			// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
+			// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside. 
+			// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
+			var rddRow = rdd.Map(r => r);
+			rddRow.serializedMode = SerializedMode.Row;
+
+			return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
+		}
+
+		/// <summary>
+		/// Registers the given <see cref="DataFrame"/> as a temporary table in the catalog.
+		/// Temporary tables exist only during the lifetime of this instance of SqlContext.
+		/// </summary>
+		/// <param name="dataFrame"></param>
+		/// <param name="tableName"></param>
+		public void RegisterDataFrameAsTable(DataFrame dataFrame, string tableName)
         {
             sqlContextProxy.RegisterDataFrameAsTable(dataFrame.DataFrameProxy, tableName);
         }
@@ -527,6 +540,14 @@ namespace Microsoft.Spark.CSharp.Sql
             Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
             sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
         }
-        #endregion
-    }
+
+		public void RegisterFunction(string name, MethodInfo f)
+		{
+			logger.LogInfo("Name of the function to register {0}, method info", name, f.DeclaringType?.FullName + "." + f.Name);
+			var helper = new UdfReflectionHelper(f);
+			Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = helper.Execute;
+			sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(helper.ReturnType));
+		}
+		#endregion
+	}
 }
\ No newline at end of file
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs
index 2efcf20..ef945c3 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs
@@ -2,6 +2,7 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System;
+using System.Collections;
 using System.Collections.Generic;
 using System.Linq;
 using System.Reflection;
@@ -14,512 +15,600 @@ using Newtonsoft.Json.Linq;
 
 namespace Microsoft.Spark.CSharp.Sql
 {
-    /// <summary>
-    /// The base type of all Spark SQL data types.
-    /// </summary>
-    [Serializable]
-    public abstract class DataType
-    {
-        /// <summary>
-        /// Trim "Type" in the end from class name, ToLower() to align with Scala.
-        /// </summary>
-        public string TypeName
-        {
-            get { return NormalizeTypeName(GetType().Name); }
-        }
+	/// <summary>
+	/// The base type of all Spark SQL data types.
+	/// </summary>
+	[Serializable]
+	public abstract class DataType
+	{
+		/// <summary>
+		/// Trim "Type" in the end from class name, ToLower() to align with Scala.
+		/// </summary>
+		public string TypeName
+		{
+			get { return NormalizeTypeName(GetType().Name); }
+		}
 
-        /// <summary>
-        /// return TypeName by default, subclass can override it
-        /// </summary>
-        public virtual string SimpleString
-        {
-            get { return TypeName; }
-        }
+		/// <summary>
+		/// return TypeName by default, subclass can override it
+		/// </summary>
+		public virtual string SimpleString
+		{
+			get { return TypeName; }
+		}
 
-        /// <summary>
-        /// return only type: TypeName by default, subclass can override it
-        /// </summary>
-        internal virtual object JsonValue { get { return TypeName; } }
+		/// <summary>
+		/// return only type: TypeName by default, subclass can override it
+		/// </summary>
+		internal virtual object JsonValue { get { return TypeName; } }
 
-        /// <summary>
-        /// The compact JSON representation of this data type.
-        /// </summary>
-        public string Json
-        {
-            get
-            {
-                var jObject = JsonValue is JObject ? ((JObject)JsonValue).SortProperties() : JsonValue;
-                return JsonConvert.SerializeObject(jObject, Formatting.None);
-            }
-        }
+		/// <summary>
+		/// The compact JSON representation of this data type.
+		/// </summary>
+		public string Json
+		{
+			get
+			{
+				var jObject = JsonValue is JObject ? ((JObject)JsonValue).SortProperties() : JsonValue;
+				return JsonConvert.SerializeObject(jObject, Formatting.None);
+			}
+		}
 
-        /// <summary>
-        /// Parses a Json string to construct a DataType.
-        /// </summary>
-        /// <param name="json">The Json string to be parsed</param>
-        /// <returns>The new DataType instance from the Json string</returns>
-        public static DataType ParseDataTypeFromJson(string json)
-        {
-            return ParseDataTypeFromJson(JToken.Parse(json));
-        }
+		/// <summary>
+		/// Parses a Json string to construct a DataType.
+		/// </summary>
+		/// <param name="json">The Json string to be parsed</param>
+		/// <returns>The new DataType instance from the Json string</returns>
+		public static DataType ParseDataTypeFromJson(string json)
+		{
+			return ParseDataTypeFromJson(JToken.Parse(json));
+		}
 
-        /// <summary>
-        /// Parse a JToken object to construct a DataType.
-        /// </summary>
-        /// <param name="json">The JToken object to be parsed</param>
-        /// <returns>The new DataType instance from the Json string</returns>
-        /// <exception cref="NotImplementedException">Not implemented for "udt" type</exception>
-        /// <exception cref="ArgumentException"></exception>
-        protected static DataType ParseDataTypeFromJson(JToken json)
-        {
-            if (json.Type == JTokenType.Object) // {name: address, type: {type: struct,...},...}
-            {
-                JToken type;
-                var typeJObject = (JObject)json;
-                if (typeJObject.TryGetValue("type", out type))
-                {
-                    Type complexType;
-                    if ((complexType = ComplexTypes.FirstOrDefault(ct => NormalizeTypeName(ct.Name) == type.ToString())) != default(Type))
-                    {
-                        return ((ComplexType)Activator.CreateInstance(complexType, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance
-                            , null, new object[] { typeJObject }, null)); // create new instance of ComplexType
-                    }
-                    if (type.ToString() == "udt")
-                    {
-                        // TODO
-                        throw new NotImplementedException();
-                    }
-                }
-                throw new ArgumentException(string.Format("Could not parse data type: {0}", type));
-            }
-            else // {name: age, type: bigint,...} // TODO: validate more JTokenType other than Object
-            {
-                return ParseAtomicType(json);
-            }
+		/// <summary>
+		/// Parse a JToken object to construct a DataType.
+		/// </summary>
+		/// <param name="json">The JToken object to be parsed</param>
+		/// <returns>The new DataType instance from the Json string</returns>
+		/// <exception cref="NotImplementedException">Not implemented for "udt" type</exception>
+		/// <exception cref="ArgumentException"></exception>
+		protected static DataType ParseDataTypeFromJson(JToken json)
+		{
+			if (json.Type == JTokenType.Object) // {name: address, type: {type: struct,...},...}
+			{
+				JToken type;
+				var typeJObject = (JObject)json;
+				if (typeJObject.TryGetValue("type", out type))
+				{
+					Type complexType;
+					if ((complexType = ComplexTypes.FirstOrDefault(ct => NormalizeTypeName(ct.Name) == type.ToString())) != default(Type))
+					{
+						return ((ComplexType)Activator.CreateInstance(complexType, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance
+							, null, new object[] { typeJObject }, null)); // create new instance of ComplexType
+					}
+					if (type.ToString() == "udt")
+					{
+						// TODO
+						throw new NotImplementedException();
+					}
+				}
+				throw new ArgumentException(string.Format("Could not parse data type: {0}", type));
+			}
+			else // {name: age, type: bigint,...} // TODO: validate more JTokenType other than Object
+			{
+				return ParseAtomicType(json);
+			}
 
-        }
+		}
 
-        private static AtomicType ParseAtomicType(JToken type)
-        {
-            Type atomicType;
-            if ((atomicType = AtomicTypes.FirstOrDefault(at => NormalizeTypeName(at.Name) == type.ToString())) != default(Type))
-            {
-                return (AtomicType)Activator.CreateInstance(atomicType); // create new instance of AtomicType
-            }
+		private static AtomicType ParseAtomicType(JToken type)
+		{
+			Type atomicType;
+			if ((atomicType = AtomicTypes.FirstOrDefault(at => NormalizeTypeName(at.Name) == type.ToString())) != default(Type))
+			{
+				return (AtomicType)Activator.CreateInstance(atomicType); // create new instance of AtomicType
+			}
 
-            Match fixedDecimal = DecimalType.FixedDecimal.Match(type.ToString());
-            if (fixedDecimal.Success)
-            {
-                return new DecimalType(int.Parse(fixedDecimal.Groups[1].Value), int.Parse(fixedDecimal.Groups[2].Value));
-            }
+			Match fixedDecimal = DecimalType.FixedDecimal.Match(type.ToString());
+			if (fixedDecimal.Success)
+			{
+				return new DecimalType(int.Parse(fixedDecimal.Groups[1].Value), int.Parse(fixedDecimal.Groups[2].Value));
+			}
 
-            throw new ArgumentException(string.Format("Could not parse data type: {0}", type));
-        }
+			throw new ArgumentException(string.Format("Could not parse data type: {0}", type));
+		}
 
-        [NonSerialized]
-        private static readonly Type[] AtomicTypes = typeof(AtomicType).Assembly.GetTypes().Where(type =>
-            type.IsSubclassOf(typeof(AtomicType))).ToArray();
+		[NonSerialized]
+		private static readonly Type[] AtomicTypes = typeof(AtomicType).Assembly.GetTypes().Where(type =>
+			type.IsSubclassOf(typeof(AtomicType))).ToArray();
 
-        [NonSerialized]
-        private static readonly Type[] ComplexTypes = typeof(ComplexType).Assembly.GetTypes().Where(type =>
-            type.IsSubclassOf(typeof(ComplexType))).ToArray();
+		[NonSerialized]
+		private static readonly Type[] ComplexTypes = typeof(ComplexType).Assembly.GetTypes().Where(type =>
+			type.IsSubclassOf(typeof(ComplexType))).ToArray();
 
-        [NonSerialized]
-        private static readonly Func<string, string> NormalizeTypeName = s => s.Substring(0, s.Length - 4).ToLower(); // trim "Type" at the end of type name
+		[NonSerialized]
+		private static readonly Func<string, string> NormalizeTypeName = s => s.Substring(0, s.Length - 4).ToLower(); // trim "Type" at the end of type name
 
 
-    }
+	}
 
-    /// <summary>
-    /// An internal type used to represent a simple type. 
-    /// </summary>
-    [Serializable]
-    public class AtomicType : DataType
-    {
-    }
+	/// <summary>
+	/// An internal type used to represent a simple type. 
+	/// </summary>
+	[Serializable]
+	public class AtomicType : DataType
+	{
+	}
 
-    /// <summary>
-    /// An internal type used to represent a complex type (such as arrays, structs, and maps).
-    /// </summary>
-    [Serializable]
-    public abstract class ComplexType : DataType
-    {
-        /// <summary>
-        /// Abstract method that constructs a complex type from a Json object
-        /// </summary>
-        /// <param name="json">The Json object to construct a complex type</param>
-        /// <returns>A new constructed complex type</returns>
-        public abstract DataType FromJson(JObject json);
-        /// <summary>
-        /// Constructs a complex type from a Json string
-        /// </summary>
-        /// <param name="json">The string that represents a Json.</param>
-        /// <returns>A new constructed complex type</returns>
-        public DataType FromJson(string json)
-        {
-            return FromJson(JObject.Parse(json));
-        }
-    }
+	/// <summary>
+	/// An internal type used to represent a complex type (such as arrays, structs, and maps).
+	/// </summary>
+	[Serializable]
+	public abstract class ComplexType : DataType
+	{
+		/// <summary>
+		/// Abstract method that constructs a complex type from a Json object
+		/// </summary>
+		/// <param name="json">The Json object to construct a complex type</param>
+		/// <returns>A new constructed complex type</returns>
+		public abstract DataType FromJson(JObject json);
+		/// <summary>
+		/// Constructs a complex type from a Json string
+		/// </summary>
+		/// <param name="json">The string that represents a Json.</param>
+		/// <returns>A new constructed complex type</returns>
+		public DataType FromJson(string json)
+		{
+			return FromJson(JObject.Parse(json));
+		}
+	}
 
-    /// <summary>
-    /// The data type representing NULL values.
-    /// </summary>
-    [Serializable]
-    public class NullType : AtomicType { }
+	/// <summary>
+	/// The data type representing NULL values.
+	/// </summary>
+	[Serializable]
+	public class NullType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing String values.
-    /// </summary>
-    [Serializable]
-    public class StringType : AtomicType { }
+	/// <summary>
+	/// The data type representing String values.
+	/// </summary>
+	[Serializable]
+	public class StringType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing binary values.
-    /// </summary>
-    [Serializable]
-    public class BinaryType : AtomicType { }
+	/// <summary>
+	/// The data type representing binary values.
+	/// </summary>
+	[Serializable]
+	public class BinaryType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing Boolean values.
-    /// </summary>
-    [Serializable]
-    public class BooleanType : AtomicType { }
+	/// <summary>
+	/// The data type representing Boolean values.
+	/// </summary>
+	[Serializable]
+	public class BooleanType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing Date values.
-    /// </summary>
-    [Serializable]
-    public class DateType : AtomicType { }
+	/// <summary>
+	/// The data type representing Date values.
+	/// </summary>
+	[Serializable]
+	public class DateType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing Timestamp values. 
-    /// </summary>
-    [Serializable]
-    public class TimestampType : AtomicType { }
+	/// <summary>
+	/// The data type representing Timestamp values. 
+	/// </summary>
+	[Serializable]
+	public class TimestampType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing Double values.
-    /// </summary>
-    [Serializable]
-    public class DoubleType : AtomicType { }
+	/// <summary>
+	/// The data type representing Double values.
+	/// </summary>
+	[Serializable]
+	public class DoubleType : AtomicType { }
 
-    /// <summary>
-    /// 
-    /// </summary>
-    [Serializable]
-    public class FloatType : AtomicType { }
+	/// <summary>
+	/// 
+	/// </summary>
+	[Serializable]
+	public class FloatType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing Float values.
-    /// </summary>
-    [Serializable]
-    public class ByteType : AtomicType { }
+	/// <summary>
+	/// The data type representing Float values.
+	/// </summary>
+	[Serializable]
+	public class ByteType : AtomicType { }
 
-    /// <summary>
-    /// 
-    /// </summary>
-    [Serializable]
-    public class IntegerType : AtomicType { }
+	/// <summary>
+	/// 
+	/// </summary>
+	[Serializable]
+	public class IntegerType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing Int values.
-    /// </summary>
-    [Serializable]
-    public class LongType : AtomicType { }
+	/// <summary>
+	/// The data type representing Int values.
+	/// </summary>
+	[Serializable]
+	public class LongType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing Short values.
-    /// </summary>
-    [Serializable]
-    public class ShortType : AtomicType { }
+	/// <summary>
+	/// The data type representing Short values.
+	/// </summary>
+	[Serializable]
+	public class ShortType : AtomicType { }
 
-    /// <summary>
-    /// The data type representing Decimal values.
-    /// </summary>
-    [Serializable]
-    public class DecimalType : AtomicType
-    {
-        /// <summary>
-        /// Gets the regular expression that represents a fixed decimal. 
-        /// </summary>
-        public static Regex FixedDecimal = new Regex(@"decimal\((\d+),\s(\d+)\)");
-        private int? precision, scale;
-        /// <summary>
-        /// Initializes a new instance of DecimalType from parameters specifying its precision and scale.
-        /// </summary>
-        /// <param name="precision">The precision of the type</param>
-        /// <param name="scale">The scale of the type</param>
-        public DecimalType(int? precision = null, int? scale = null)
-        {
-            this.precision = precision;
-            this.scale = scale;
-        }
+	/// <summary>
+	/// The data type representing Decimal values.
+	/// </summary>
+	[Serializable]
+	public class DecimalType : AtomicType
+	{
+		/// <summary>
+		/// Gets the regular expression that represents a fixed decimal. 
+		/// </summary>
+		public static Regex FixedDecimal = new Regex(@"decimal\s*\((\d+),\s*(\d+)\)");
+		private int? precision, scale;
+		/// <summary>
+		/// Initializes a new instance of DecimalType from parameters specifying its precision and scale.
+		/// </summary>
+		/// <param name="precision">The precision of the type</param>
+		/// <param name="scale">The scale of the type</param>
+		public DecimalType(int? precision = null, int? scale = null)
+		{
+			this.precision = precision;
+			this.scale = scale;
+		}
 
-        internal override object JsonValue
-        {
-            get { throw new NotImplementedException(); }
-        }
+		internal override object JsonValue
+		{
+			get
+			{
+				if (precision == null && scale == null) return "decimal";
+				return "decimal(" + precision + "," + scale + ")";
+			}
+		}
 
-        /// <summary>
-        /// Constructs a DecimalType from a Json object
-        /// </summary>
-        /// <param name="json">The Json object used to construct a DecimalType</param>
-        /// <returns>A new DecimalType instance</returns>
-        /// <exception cref="NotImplementedException">Not implemented yet.</exception>
-        public DataType FromJson(JObject json)
-        {
-            throw new NotImplementedException();
-        }
-    }
+		/// <summary>
+		/// Constructs a DecimalType from a Json object
+		/// </summary>
+		/// <param name="json">The Json object used to construct a DecimalType</param>
+		/// <returns>A new DecimalType instance</returns>
+		/// <exception cref="NotImplementedException">Not implemented yet.</exception>
+		public DataType FromJson(JObject json)
+		{
+			return ParseDataTypeFromJson(json);
+		}
+	}
 
-    /// <summary>
-    /// The data type for collections of multiple values. 
-    /// </summary>
-    [Serializable]
-    public class ArrayType : ComplexType
-    {
-        /// <summary>
-        /// Gets the DataType of each element in the array
-        /// </summary>
-        public DataType ElementType { get { return elementType; } }
-        /// <summary>
-        /// Returns whether the array can contain null (None) values
-        /// </summary>
-        public bool ContainsNull { get { return containsNull; } }
+	/// <summary>
+	/// The data type for collections of multiple values. 
+	/// </summary>
+	[Serializable]
+	public class ArrayType : ComplexType
+	{
+		/// <summary>
+		/// Gets the DataType of each element in the array
+		/// </summary>
+		public DataType ElementType { get { return elementType; } }
+		/// <summary>
+		/// Returns whether the array can contain null (None) values
+		/// </summary>
+		public bool ContainsNull { get { return containsNull; } }
 
-        /// <summary>
-        /// Initializes a ArrayType instance with a specific DataType and specifying if the array has null values.
-        /// </summary>
-        /// <param name="elementType">The data type of values</param>
-        /// <param name="containsNull">Indicates if values have null values</param>
-        public ArrayType(DataType elementType, bool containsNull = true)
-        {
-            this.elementType = elementType;
-            this.containsNull = containsNull;
-        }
+		/// <summary>
+		/// Initializes a ArrayType instance with a specific DataType and specifying if the array has null values.
+		/// </summary>
+		/// <param name="elementType">The data type of values</param>
+		/// <param name="containsNull">Indicates if values have null values</param>
+		public ArrayType(DataType elementType, bool containsNull = true)
+		{
+			this.elementType = elementType;
+			this.containsNull = containsNull;
+		}
 
-        internal ArrayType(JObject json)
-        {
-            FromJson(json);
-        }
+		internal ArrayType(JObject json)
+		{
+			FromJson(json);
+		}
 
-        /// <summary>
-        /// Readable string representation for the type.
-        /// </summary>
-        public override string SimpleString
-        {
-            get { return string.Format("array<{0}>", elementType.SimpleString); }
-        }
+		/// <summary>
+		/// Readable string representation for the type.
+		/// </summary>
+		public override string SimpleString
+		{
+			get { return string.Format("array<{0}>", elementType.SimpleString); }
+		}
 
-        internal override object JsonValue
-        {
-            get
-            {
-                return new JObject(
-                                  new JProperty("type", TypeName),
-                                  new JProperty("elementType", elementType.JsonValue),
-                                  new JProperty("containsNull", containsNull));
-            }
-        }
+		internal override object JsonValue
+		{
+			get
+			{
+				return new JObject(
+								  new JProperty("type", TypeName),
+								  new JProperty("elementType", elementType.JsonValue),
+								  new JProperty("containsNull", containsNull));
+			}
+		}
 
-        /// <summary>
-        /// Constructs a ArrayType from a Json object
-        /// </summary>
-        /// <param name="json">The Json object used to construct a ArrayType</param>
-        /// <returns>A new ArrayType instance</returns>
-        public override sealed DataType FromJson(JObject json)
-        {
-            elementType = ParseDataTypeFromJson(json["elementType"]);
-            containsNull = (bool)json["containsNull"];
-            return this;
-        }
+		/// <summary>
+		/// Constructs a ArrayType from a Json object
+		/// </summary>
+		/// <param name="json">The Json object used to construct a ArrayType</param>
+		/// <returns>A new ArrayType instance</returns>
+		public override sealed DataType FromJson(JObject json)
+		{
+			elementType = ParseDataTypeFromJson(json["elementType"]);
+			containsNull = (bool)json["containsNull"];
+			return this;
+		}
 
-        private DataType elementType;
-        private bool containsNull;
-    }
+		private DataType elementType;
+		private bool containsNull;
+	}
 
-    /// <summary>
-    /// The data type for Maps. Not implemented yet.
-    /// </summary>
-    [Serializable]
-    public class MapType : ComplexType
-    {
-        internal override object JsonValue
-        {
-            get { throw new NotImplementedException(); }
-        }
+	/// <summary>
+	/// The data type for Maps. Not implemented yet.
+	/// </summary>
+	[Serializable]
+	public class MapType : ComplexType
+	{
+		internal override object JsonValue
+		{
+			get { throw new NotImplementedException(); }
+		}
 
-        /// <summary>
-        /// Constructs a StructField from a Json object. Not implemented yet.
-        /// </summary>
-        /// <param name="json">The Json object used to construct a MapType</param>
-        /// <returns>A new MapType instance</returns>
-        /// <exception cref="NotImplementedException"></exception>
-        public override DataType FromJson(JObject json)
-        {
-            throw new NotImplementedException();
-        }
-    }
+		/// <summary>
+		/// Constructs a StructField from a Json object. Not implemented yet.
+		/// </summary>
+		/// <param name="json">The Json object used to construct a MapType</param>
+		/// <returns>A new MapType instance</returns>
+		/// <exception cref="NotImplementedException"></exception>
+		public override DataType FromJson(JObject json)
+		{
+			throw new NotImplementedException();
+		}
+	}
 
-    /// <summary>
-    /// A field inside a StructType.
-    /// </summary>
-    [Serializable]
-    public class StructField : ComplexType
-    {
-        /// <summary>
-        /// The name of this field.
-        /// </summary>
-        public string Name { get { return name; } }
-        /// <summary>
-        /// The data type of this field.
-        /// </summary>
-        public DataType DataType { get { return dataType; } }
-        /// <summary>
-        /// Indicates if values of this field can be null values.
-        /// </summary>
-        public bool IsNullable { get { return isNullable; } }
-        /// <summary>
-        /// The metadata of this field. The metadata should be preserved during transformation if the content of the column is not modified, e.g, in selection. 
-        /// </summary>
-        public JObject Metadata { get { return metadata; } }
+	/// <summary>
+	/// A field inside a StructType.
+	/// </summary>
+	[Serializable]
+	public class StructField : ComplexType
+	{
+		/// <summary>
+		/// The name of this field.
+		/// </summary>
+		public string Name { get { return name; } }
+		/// <summary>
+		/// The data type of this field.
+		/// </summary>
+		public DataType DataType { get { return dataType; } }
+		/// <summary>
+		/// Indicates if values of this field can be null values.
+		/// </summary>
+		public bool IsNullable { get { return isNullable; } }
+		/// <summary>
+		/// The metadata of this field. The metadata should be preserved during transformation if the content of the column is not modified, e.g, in selection. 
+		/// </summary>
+		public JObject Metadata { get { return metadata; } }
 
-        /// <summary>
-        /// Initializes a StructField instance with a specific name, data type, nullable, and metadata
-        /// </summary>
-        /// <param name="name">The name of this field</param>
-        /// <param name="dataType">The data type of this field</param>
-        /// <param name="isNullable">Indicates if values of this field can be null values</param>
-        /// <param name="metadata">The metadata of this field</param>
-        public StructField(string name, DataType dataType, bool isNullable = true, JObject metadata = null)
-        {
-            this.name = name;
-            this.dataType = dataType;
-            this.isNullable = isNullable;
-            this.metadata = metadata ?? new JObject();
-        }
+		/// <summary>
+		/// Initializes a StructField instance with a specific name, data type, nullable, and metadata
+		/// </summary>
+		/// <param name="name">The name of this field</param>
+		/// <param name="dataType">The data type of this field</param>
+		/// <param name="isNullable">Indicates if values of this field can be null values</param>
+		/// <param name="metadata">The metadata of this field</param>
+		public StructField(string name, DataType dataType, bool isNullable = true, JObject metadata = null)
+		{
+			this.name = name;
+			this.dataType = dataType;
+			this.isNullable = isNullable;
+			this.metadata = metadata ?? new JObject();
+		}
 
-        internal StructField(JObject json)
-        {
-            FromJson(json);
-        }
+		internal StructField(JObject json)
+		{
+			FromJson(json);
+		}
 
-        /// <summary>
-        /// Returns a readable string that represents the type.
-        /// </summary>
-        public override string SimpleString { get { return string.Format(@"{0}:{1}", name, dataType.SimpleString); } }
+		/// <summary>
+		/// Returns a readable string that represents the type.
+		/// </summary>
+		public override string SimpleString { get { return string.Format(@"{0}:{1}", name, dataType.SimpleString); } }
 
-        internal override object JsonValue
-        {
-            get
-            {
-                return new JObject(
-                            new JProperty("name", name),
-                            new JProperty("type", dataType.JsonValue),
-                            new JProperty("nullable", isNullable),
-                            new JProperty("metadata", metadata));
-            }
-        }
+		internal override object JsonValue
+		{
+			get
+			{
+				return new JObject(
+							new JProperty("name", name),
+							new JProperty("type", dataType.JsonValue),
+							new JProperty("nullable", isNullable),
+							new JProperty("metadata", metadata));
+			}
+		}
 
-        /// <summary>
-        /// Constructs a StructField from a Json object
-        /// </summary>
-        /// <param name="json">The Json object used to construct a StructField</param>
-        /// <returns>A new StructField instance</returns>
-        public override sealed DataType FromJson(JObject json)
-        {
-            name = json["name"].ToString();
-            dataType = ParseDataTypeFromJson(json["type"]);
-            isNullable = (bool)json["nullable"];
-            metadata = (JObject)json["metadata"];
-            return this;
-        }
+		/// <summary>
+		/// Constructs a StructField from a Json object
+		/// </summary>
+		/// <param name="json">The Json object used to construct a StructField</param>
+		/// <returns>A new StructField instance</returns>
+		public override sealed DataType FromJson(JObject json)
+		{
+			name = json["name"].ToString();
+			dataType = ParseDataTypeFromJson(json["type"]);
+			isNullable = (bool)json["nullable"];
+			metadata = (JObject)json["metadata"];
+			return this;
+		}
 
-        private string name;
-        private DataType dataType;
-        private bool isNullable;
-        [NonSerialized]
-        private JObject metadata;
-    }
+		private string name;
+		private DataType dataType;
+		private bool isNullable;
+		[NonSerialized]
+		private JObject metadata;
+	}
 
-    /// <summary>
-    /// Struct type, consisting of a list of StructField
-    /// This is the data type representing a Row
-    /// </summary>
-    [Serializable]
-    public class StructType : ComplexType
-    {
-        /// <summary>
-        /// Gets a list of StructField.
-        /// </summary>
-        public List<StructField> Fields { get { return fields; } }
+	/// <summary>
+	/// Struct type, consisting of a list of StructField
+	/// This is the data type representing a Row
+	/// </summary>
+	[Serializable]
+	public class StructType : ComplexType
+	{
+		/// <summary>
+		/// Gets a list of StructField.
+		/// </summary>
+		public List<StructField> Fields { get { return fields; } }
 
-        internal IStructTypeProxy StructTypeProxy
-        {
-            get
-            {
-                return structTypeProxy ?? 
-                    new StructTypeIpcProxy(
-                        new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "createSchema",
-                            new object[] { Json }).ToString()));
-            }
-        }
 
-        /// <summary>
-        /// Initializes a StructType instance with a specific collection of SructField object.
-        /// </summary>
-        /// <param name="fields">The collection that holds StructField objects</param>
-        public StructType(IEnumerable<StructField> fields)
-        {
-            this.fields = fields.ToList();
-        }
+		private Lazy<Func<dynamic, dynamic>[]> pickleConverters;
 
-        internal StructType(JObject json)
-        {
-            FromJson(json);
-        }
+		private Func<dynamic, dynamic>[] ConstructPickleConverters()
+		{
+			var funcs = new Func<dynamic, dynamic>[fields.Count];
+			int index = 0;
+			foreach (var field in fields)
+			{
+				if (field.DataType is StringType)
+				{
+					funcs[index] = x => x?.ToString();
+				}
+				/*else if (field.DataType is LongType)
+				{
+					funcs[index] = x => x==null?null:(dynamic)(long)x ;
+				}*/
+				/*else if (field.DataType is DateType)
+				{
+					funcs[index] = x => x;
+				}*/
+				else if (field.DataType is ArrayType)
+				{
+					Func<DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
+					{
+						StructField[] f = new StructField[length];
+						for (int i = 0; i < length; i++)
+						{
+							f[i] = new StructField(string.Format("_array_{0}", i), dataType);
+						}
+						return new StructType(f);
+					};
+					var elementType = (field.DataType as ArrayType).ElementType;
+					funcs[index] = x =>
+					{
 
-        internal StructType(IStructTypeProxy structTypeProxy)
-        {
-            this.structTypeProxy = structTypeProxy;
-            var jsonSchema = structTypeProxy.ToJson();
-            FromJson(jsonSchema);
-        }
+						// Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)), 
+						// then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
+						// In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
+						object[] valueOfArray = (x as ArrayList)?.ToArray() ?? x as object[];
+						if (valueOfArray == null)
+						{
+							throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
+						}
 
-        /// <summary>
-        /// Returns a readable string that joins all <see cref="StructField"/>s together.
-        /// </summary>
-        public override string SimpleString
-        {
-            get { return string.Format(@"struct<{0}>", string.Join(",", fields.Select(f => f.SimpleString))); }
-        }
+						return new RowImpl(valueOfArray,
+							elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).Values; // TODO: this part may have some problems, not verified
+					};
+				}
+				else if (field.DataType is MapType)
+				{
+					//TODO
+					throw new NotImplementedException();
+				}
+				else if (field.DataType is StructType)
+				{
+					funcs[index] = x => x != null ? new RowImpl(x, field.DataType as StructType) : null;
+				}
+				else
+				{
+					funcs[index] = x => x;
+				}
+				index++;
+			}
+			return funcs;
+		}
 
-        internal override object JsonValue
-        {
-            get
-            {
-                return new JObject(
-                                new JProperty("type", TypeName),
-                                new JProperty("fields", fields.Select(f => f.JsonValue).ToArray()));
-            }
-        }
+		internal IStructTypeProxy StructTypeProxy
+		{
+			get
+			{
+				return structTypeProxy ??
+					new StructTypeIpcProxy(
+						new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "createSchema",
+							new object[] { Json }).ToString()));
+			}
+		}
 
-        /// <summary>
-        /// Constructs a StructType from a Json object
-        /// </summary>
-        /// <param name="json">The Json object used to construct a StructType</param>
-        /// <returns>A new StructType instance</returns>
-        public override sealed DataType FromJson(JObject json)
-        {
-            var fieldsJObjects = json["fields"].Select(f => (JObject)f);
-            fields = fieldsJObjects.Select(fieldJObject => (new StructField(fieldJObject))).ToList();
-            return this;
-        }
+		/// <summary>
+		/// Initializes a StructType instance with a specific collection of SructField object.
+		/// </summary>
+		/// <param name="fields">The collection that holds StructField objects</param>
+		public StructType(IEnumerable<StructField> fields)
+		{
+			this.fields = fields.ToList();
+			Initialize();
+		}
 
-        [NonSerialized]
-        private readonly IStructTypeProxy structTypeProxy;
+		internal StructType(JObject json)
+		{
+			FromJson(json);
+			Initialize();
+		}
 
-        private List<StructField> fields;
-    }
+		internal StructType(IStructTypeProxy structTypeProxy)
+		{
+			this.structTypeProxy = structTypeProxy;
+			var jsonSchema = structTypeProxy.ToJson();
+			FromJson(jsonSchema);
+			Initialize();
+		}
+
+		public void ConvertPickleObjects(dynamic[] input, dynamic[] output)
+		{
+			var c = pickleConverters.Value;
+			for (int i = 0; i < input.Length; ++i)
+			{
+				output[i] = c[i](input[i]);
+			}
+		}
+
+		private void Initialize()
+		{
+			pickleConverters = new Lazy<Func<dynamic, dynamic>[]>(ConstructPickleConverters);
+		}
+
+		/// <summary>
+		/// Returns a readable string that joins all <see cref="StructField"/>s together.
+		/// </summary>
+		public override string SimpleString
+		{
+			get { return string.Format(@"struct<{0}>", string.Join(",", fields.Select(f => f.SimpleString))); }
+		}
+
+		internal override object JsonValue
+		{
+			get
+			{
+				return new JObject(
+								new JProperty("type", TypeName),
+								new JProperty("fields", fields.Select(f => f.JsonValue).ToArray()));
+			}
+		}
+
+		/// <summary>
+		/// Constructs a StructType from a Json object
+		/// </summary>
+		/// <param name="json">The Json object used to construct a StructType</param>
+		/// <returns>A new StructType instance</returns>
+		public override sealed DataType FromJson(JObject json)
+		{
+			var fieldsJObjects = json["fields"].Select(f => (JObject)f);
+			fields = fieldsJObjects.Select(fieldJObject => (new StructField(fieldJObject))).ToList();
+			return this;
+		}
+
+		[NonSerialized]
+		private readonly IStructTypeProxy structTypeProxy;
+
+		private List<StructField> fields;
+	}
 
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs
index b9c5008..eaa602b 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs
@@ -5,6 +5,7 @@ using System;
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
 using System.Linq;
+using System.Reflection;
 using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
@@ -249,6 +250,17 @@ namespace Microsoft.Spark.CSharp.Sql
             Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
             udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
         }
-        #endregion
-    }
+
+		public void RegisterFunction(string name, MethodInfo f)
+		{
+			if (!f.IsStatic)
+				throw new InvalidOperationException(f.DeclaringType?.FullName + "." + f.Name +
+				                                    " is not a static method, can't be registered");
+			logger.LogInfo("Name of the function to register {0}, method info", name, f.DeclaringType?.FullName + "." + f.Name);
+			var helper = new UdfReflectionHelper(f);
+			Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = helper.Execute;
+			udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(helper.ReturnType));
+		}
+		#endregion
+	}
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/packages.config b/csharp/Adapter/Microsoft.Spark.CSharp/packages.config
index 8f5143e..d95f59d 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/packages.config
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
-  <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
+  <package id="log4net" version="2.0.8" targetFramework="net45" />
+  <package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0" targetFramework="net45" />
-</packages>
+</packages>
\ No newline at end of file
diff --git a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
index 0d192a5..f7d5b48 100644
--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
@@ -3513,7 +3513,7 @@
             Close the socket connections and releases all associated resources.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
             <summary>
             Establishes a connection to a remote host that is specified by an IP address and a port number
             </summary>
@@ -3612,12 +3612,13 @@
             Close the ISocket connections and releases all associated resources.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
             <summary>
             Establishes a connection to a remote host that is specified by an IP address and a port number
             </summary>
             <param name="remoteaddr">The IP address of the remote host</param>
             <param name="port">The port number of the remote host</param>
+            <param name="secret">The secret to connect, can be null</param>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.GetStream">
             <summary>
@@ -3770,7 +3771,7 @@
             Close the ISocket connections and releases all associated resources.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.RioSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Network.RioSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
             <summary>
             Establishes a connection to a remote host that is specified by an IP address and a port number
             </summary>
@@ -3912,7 +3913,7 @@
             Close the ISocket connections and releases all associated resources.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
             <summary>
             Establishes a connection to a remote host that is specified by an IP address and a port number
             </summary>
@@ -5190,12 +5191,13 @@
             </summary>
             <returns>row count</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Show(System.Int32,System.Boolean)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Show(System.Int32,System.Int32,System.Boolean)">
             <summary>
             Displays rows of the DataFrame in tabular form
             </summary>
             <param name="numberOfRows">Number of rows to display - default 20</param>
             <param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
+            <param name="vertical">If set to True, print output rows vertically (one line per column value).</param>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.ShowSchema">
             <summary>
@@ -5627,10 +5629,11 @@
             the 100 new partitions will claim 10 of the current partitions.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Persist">
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Persist(Microsoft.Spark.CSharp.Core.StorageLevelType)">
             <summary>
             Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
             </summary>
+            <param name="type">Persist storage type</param>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Unpersist(System.Boolean)">
             <summary>
@@ -6040,6 +6043,15 @@
             DataFrame if no paths are passed in.
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrameReader.Avro(System.String)">
+            <summary>
+            Loads a AVRO file (one object per line) and returns the result as a DataFrame.
+            
+            This function goes through the input once to determine the input schema. If you know the
+            schema in advance, use the version that specifies the schema to avoid the extra scan.
+            </summary>
+            <param name="path">input path</param>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.DataFrameWriter">
             <summary>
             Interface used to write a DataFrame to external storage systems (e.g. file systems,
@@ -6145,6 +6157,13 @@
                Format("parquet").Save(path)
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrameWriter.Avro(System.String)">
+            <summary>
+            Saves the content of the DataFrame in AVRO format at the specified path.
+            This is equivalent to:
+               Format("com.databricks.spark.avro").Save(path)
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.Dataset">
             <summary>
              Dataset is a strongly typed collection of domain-specific objects that can be transformed
@@ -6193,13 +6212,14 @@
             Returns all column names as an array.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Show(System.Int32,System.Boolean)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Show(System.Int32,System.Int32,System.Boolean)">
             <summary>
             Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters
             will be truncated, and all cells will be aligned right.
             </summary>
             <param name="numberOfRows">Number of rows - default is 20</param>
             <param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
+            <param name="vertical">If set to true, prints output rows vertically (one line per column value).</param>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.ShowSchema">
             <summary>
diff --git a/csharp/Adapter/documentation/Mobius_API_Documentation.md b/csharp/Adapter/documentation/Mobius_API_Documentation.md
index c9e4065..7ee0e9e 100644
--- a/csharp/Adapter/documentation/Mobius_API_Documentation.md
+++ b/csharp/Adapter/documentation/Mobius_API_Documentation.md
@@ -638,7 +638,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">RegisterTempTable</font></td><td>Registers this DataFrame as a temporary table using the given name. The lifetime of this temporary table is tied to the SqlContext that was used to create this DataFrame.</td></tr><tr><td><font color="blue">Count</font></td><td>Number of rows in the DataFrame</td></tr><tr><td><font color="blue">Show</font></td><td>Displays rows of the DataFrame in tabular form</td></tr><tr><td><font color="blue">ShowSchema</font></td><td>Prints the schema information of the DataFrame</td></tr><tr><td><font color="blue">Collect</font></td><td>Returns all of Rows in this DataFrame</td></tr><tr><td><font color="blue">ToRDD</font></td><td>Converts the DataFrame to RDD of Row</td></tr><tr><td><font color="blue">ToJSON</font></td><td>Returns the content of the DataFrame as RDD of JSON strings</td></tr><tr><td><font color="blue">Explain</font></td><td>Prints the plans (logical and physical) to the console for debugging purposes</td></tr><tr><td><font color="blue">Select</font></td><td>Selects a set of columns specified by column name or Column. df.Select("colA", df["colB"]) df.Select("*", df["colB"] + 10)</td></tr><tr><td><font color="blue">Select</font></td><td>Selects a set of columns. This is a variant of `select` that can only select existing columns using column names (i.e. cannot construct expressions). df.Select("colA", "colB")</td></tr><tr><td><font color="blue">SelectExpr</font></td><td>Selects a set of SQL expressions. This is a variant of `select` that accepts SQL expressions. df.SelectExpr("colA", "colB as newName", "abs(colC)")</td></tr><tr><td><font color="blue">Where</font></td><td>Filters rows using the given condition</td></tr><tr><td><font color="blue">Filter</font></td><td>Filters rows using the given condition</td></tr><tr><td><font color="blue">GroupBy</font></td><td>Groups the DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Rollup</font></td><td>Create a multi-dimensional rollup for the current DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Cube</font></td><td>Create a multi-dimensional cube for the current DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Agg</font></td><td>Aggregates on the DataFrame for the given column-aggregate function mapping</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Cartesian join</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Inner equi-join using given column name</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Inner equi-join using given column name</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame, using the specified JoinType</td></tr><tr><td><font color="blue">Intersect</font></td><td>Intersect with another DataFrame. This is equivalent to `INTERSECT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, intersect(self, other)</td></tr><tr><td><font color="blue">UnionAll</font></td><td>Union with another DataFrame WITHOUT removing duplicated rows. This is equivalent to `UNION ALL` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, unionAll(self, other)</td></tr><tr><td><font color="blue">Subtract</font></td><td>Returns a new DataFrame containing rows in this frame but not in another frame. This is equivalent to `EXCEPT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, subtract(self, other)</td></tr><tr><td><font color="blue">Drop</font></td><td>Returns a new DataFrame with a column dropped. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, drop(self, col)</td></tr><tr><td><font color="blue">DropNa</font></td><td>Returns a new DataFrame omitting rows with null values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropna(self, how='any', thresh=None, subset=None)</td></tr><tr><td><font color="blue">Na</font></td><td>Returns a DataFrameNaFunctions for working with missing data.</td></tr><tr><td><font color="blue">FillNa</font></td><td>Replace null values, alias for ``na.fill()`</td></tr><tr><td><font color="blue">DropDuplicates</font></td><td>Returns a new DataFrame with duplicate rows removed, considering only the subset of columns. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropDuplicates(self, subset=None)</td></tr><tr><td><font color="blue">Replace``1</font></td><td>Returns a new DataFrame replacing a value with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">ReplaceAll``1</font></td><td>Returns a new DataFrame replacing values with other values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">ReplaceAll``1</font></td><td>Returns a new DataFrame replacing values with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">RandomSplit</font></td><td>Randomly splits this DataFrame with the provided weights. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, randomSplit(self, weights, seed=None)</td></tr><tr><td><font color="blue">Columns</font></td><td>Returns all column names as a list. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, columns(self)</td></tr><tr><td><font color="blue">DTypes</font></td><td>Returns all column names and their data types. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dtypes(self)</td></tr><tr><td><font color="blue">Sort</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">Sort</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">SortWithinPartitions</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">SortWithinPartition</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">Alias</font></td><td>Returns a new DataFrame with an alias set. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, alias(self, alias)</td></tr><tr><td><font color="blue">WithColumn</font></td><td>Returns a new DataFrame by adding a column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumn(self, colName, col)</td></tr><tr><td><font color="blue">WithColumnRenamed</font></td><td>Returns a new DataFrame by renaming an existing column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumnRenamed(self, existing, new)</td></tr><tr><td><font color="blue">Corr</font></td><td>Calculates the correlation of two columns of a DataFrame as a double value. Currently only supports the Pearson Correlation Coefficient. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, corr(self, col1, col2, method=None)</td></tr><tr><td><font color="blue">Cov</font></td><td>Calculate the sample covariance of two columns as a double value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, cov(self, col1, col2)</td></tr><tr><td><font color="blue">FreqItems</font></td><td>Finding frequent items for columns, possibly with false positives. Using the frequent element count algorithm described in "http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou". Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, freqItems(self, cols, support=None) Note: This function is meant for exploratory data analysis, as we make no guarantee about the backward compatibility of the schema of the resulting DataFrame.</td></tr><tr><td><font color="blue">Crosstab</font></td><td>Computes a pair-wise frequency table of the given columns. Also known as a contingency table. The number of distinct values for each column should be less than 1e4. At most 1e6 non-zero pair frequencies will be returned. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, crosstab(self, col1, col2)</td></tr><tr><td><font color="blue">Describe</font></td><td>Computes statistics for numeric columns. This include count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical columns.</td></tr><tr><td><font color="blue">Limit</font></td><td>Returns a new DataFrame by taking the first `n` rows. The difference between this function and `head` is that `head` returns an array while `limit` returns a new DataFrame.</td></tr><tr><td><font color="blue">Head</font></td><td>Returns the first `n` rows.</td></tr><tr><td><font color="blue">First</font></td><td>Returns the first row.</td></tr><tr><td><font color="blue">Take</font></td><td>Returns the first `n` rows in the DataFrame.</td></tr><tr><td><font color="blue">Distinct</font></td><td>Returns a new DataFrame that contains only the unique rows from this DataFrame.</td></tr><tr><td><font color="blue">Coalesce</font></td><td>Returns a new DataFrame that has exactly `numPartitions` partitions. Similar to coalesce defined on an RDD, this operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of the current partitions.</td></tr><tr><td><font color="blue">Persist</font></td><td>Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)</td></tr><tr><td><font color="blue">Unpersist</font></td><td>Mark the DataFrame as non-persistent, and remove all blocks for it from memory and disk.</td></tr><tr><td><font color="blue">Cache</font></td><td>Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new DataFrame that has exactly `numPartitions` partitions.</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.</td></tr><tr><td><font color="blue">Sample</font></td><td>Returns a new DataFrame by sampling a fraction of rows.</td></tr><tr><td><font color="blue">FlatMap``1</font></td><td>Returns a new RDD by first applying a function to all rows of this DataFrame, and then flattening the results.</td></tr><tr><td><font color="blue">Map``1</font></td><td>Returns a new RDD by applying a function to all rows of this DataFrame.</td></tr><tr><td><font color="blue">MapPartitions``1</font></td><td>Returns a new RDD by applying a function to each partition of this DataFrame.</td></tr><tr><td><font color="blue">ForeachPartition</font></td><td>Applies a function f to each partition of this DataFrame.</td></tr><tr><td><font color="blue">Foreach</font></td><td>Applies a function f to all rows.</td></tr><tr><td><font color="blue">Write</font></td><td>Interface for saving the content of the DataFrame out into external storage.</td></tr><tr><td><font color="blue">SaveAsParquetFile</font></td><td>Saves the contents of this DataFrame as a parquet file, preserving the schema. Files that are written out using this method can be read back in as a DataFrame using the `parquetFile` function in SQLContext.</td></tr><tr><td><font color="blue">InsertInto</font></td><td>Adds the rows from this RDD to the specified table, optionally overwriting the existing data.</td></tr><tr><td><font color="blue">SaveAsTable</font></td><td>Creates a table from the the contents of this DataFrame based on a given data source, SaveMode specified by mode, and a set of options. Note that this currently only works with DataFrames that are created from a HiveContext as there is no notion of a persisted catalog in a standard SQL context. Instead you can write an RDD out to a parquet file, and then register that file as a table. This "table" can then be the target of an `insertInto`. Also note that while this function can persist the table metadata into Hive's metastore, the table will NOT be accessible from Hive, until SPARK-7550 is resolved.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the contents of this DataFrame based on the given data source, SaveMode specified by mode, and a set of options.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing any null values.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing null values. If `how` is "any", then drop rows containing any null values. If `how` is "all", then drop rows only if every column is null for that row.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new [[DataFrame]] that drops rows containing null values in the specified columns. If `how` is "any", then drop rows containing any null values in the specified columns. If `how` is "all", then drop rows only if every specified column is null for that row.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing any null values in the specified columns.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values values in the specified columns.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in numeric columns with `value`.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in string columns with `value`.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in specified numeric columns. If a specified column is not a numeric column, it is ignored.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in specified string columns. If a specified column is not a numeric column, it is ignored.</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. The value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`. For example, the following replaces null values in column "A" with string "unknown", and null values in column "B" with numeric value 1.0. import com.google.common.collect.ImmutableMap; df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "*", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height". df.replace("height", ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name". df.replace("name", ImmutableMap.of("UNKNOWN", "unnamed")); // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns. df.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "*", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight". df.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname". df.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));</td></tr><tr><td><font color="blue"></font></td><td>Specifies the input data source format.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.</td></tr><tr><td><font color="blue"></font></td><td>Adds an input option for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Adds input options for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).</td></tr><tr><td><font color="blue"></font></td><td>Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).</td></tr><tr><td><font color="blue"></font></td><td>Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.</td></tr><tr><td><font color="blue"></font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue"></font></td><td>Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the underlying output data source. Built-in options include "parquet", "json", etc.</td></tr><tr><td><font color="blue"></font></td><td>Adds an output option for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Adds output options for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame at the specified path.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame as the specified table.</td></tr><tr><td><font color="blue"></font></td><td>Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">RegisterTempTable</font></td><td>Registers this DataFrame as a temporary table using the given name. The lifetime of this temporary table is tied to the SqlContext that was used to create this DataFrame.</td></tr><tr><td><font color="blue">Count</font></td><td>Number of rows in the DataFrame</td></tr><tr><td><font color="blue">Show</font></td><td>Displays rows of the DataFrame in tabular form</td></tr><tr><td><font color="blue">ShowSchema</font></td><td>Prints the schema information of the DataFrame</td></tr><tr><td><font color="blue">Collect</font></td><td>Returns all of Rows in this DataFrame</td></tr><tr><td><font color="blue">ToRDD</font></td><td>Converts the DataFrame to RDD of Row</td></tr><tr><td><font color="blue">ToJSON</font></td><td>Returns the content of the DataFrame as RDD of JSON strings</td></tr><tr><td><font color="blue">Explain</font></td><td>Prints the plans (logical and physical) to the console for debugging purposes</td></tr><tr><td><font color="blue">Select</font></td><td>Selects a set of columns specified by column name or Column. df.Select("colA", df["colB"]) df.Select("*", df["colB"] + 10)</td></tr><tr><td><font color="blue">Select</font></td><td>Selects a set of columns. This is a variant of `select` that can only select existing columns using column names (i.e. cannot construct expressions). df.Select("colA", "colB")</td></tr><tr><td><font color="blue">SelectExpr</font></td><td>Selects a set of SQL expressions. This is a variant of `select` that accepts SQL expressions. df.SelectExpr("colA", "colB as newName", "abs(colC)")</td></tr><tr><td><font color="blue">Where</font></td><td>Filters rows using the given condition</td></tr><tr><td><font color="blue">Filter</font></td><td>Filters rows using the given condition</td></tr><tr><td><font color="blue">GroupBy</font></td><td>Groups the DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Rollup</font></td><td>Create a multi-dimensional rollup for the current DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Cube</font></td><td>Create a multi-dimensional cube for the current DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Agg</font></td><td>Aggregates on the DataFrame for the given column-aggregate function mapping</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Cartesian join</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Inner equi-join using given column name</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Inner equi-join using given column name</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame, using the specified JoinType</td></tr><tr><td><font color="blue">Intersect</font></td><td>Intersect with another DataFrame. This is equivalent to `INTERSECT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, intersect(self, other)</td></tr><tr><td><font color="blue">UnionAll</font></td><td>Union with another DataFrame WITHOUT removing duplicated rows. This is equivalent to `UNION ALL` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, unionAll(self, other)</td></tr><tr><td><font color="blue">Subtract</font></td><td>Returns a new DataFrame containing rows in this frame but not in another frame. This is equivalent to `EXCEPT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, subtract(self, other)</td></tr><tr><td><font color="blue">Drop</font></td><td>Returns a new DataFrame with a column dropped. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, drop(self, col)</td></tr><tr><td><font color="blue">DropNa</font></td><td>Returns a new DataFrame omitting rows with null values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropna(self, how='any', thresh=None, subset=None)</td></tr><tr><td><font color="blue">Na</font></td><td>Returns a DataFrameNaFunctions for working with missing data.</td></tr><tr><td><font color="blue">FillNa</font></td><td>Replace null values, alias for ``na.fill()`</td></tr><tr><td><font color="blue">DropDuplicates</font></td><td>Returns a new DataFrame with duplicate rows removed, considering only the subset of columns. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropDuplicates(self, subset=None)</td></tr><tr><td><font color="blue">Replace``1</font></td><td>Returns a new DataFrame replacing a value with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">ReplaceAll``1</font></td><td>Returns a new DataFrame replacing values with other values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">ReplaceAll``1</font></td><td>Returns a new DataFrame replacing values with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">RandomSplit</font></td><td>Randomly splits this DataFrame with the provided weights. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, randomSplit(self, weights, seed=None)</td></tr><tr><td><font color="blue">Columns</font></td><td>Returns all column names as a list. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, columns(self)</td></tr><tr><td><font color="blue">DTypes</font></td><td>Returns all column names and their data types. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dtypes(self)</td></tr><tr><td><font color="blue">Sort</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">Sort</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">SortWithinPartitions</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">SortWithinPartition</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">Alias</font></td><td>Returns a new DataFrame with an alias set. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, alias(self, alias)</td></tr><tr><td><font color="blue">WithColumn</font></td><td>Returns a new DataFrame by adding a column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumn(self, colName, col)</td></tr><tr><td><font color="blue">WithColumnRenamed</font></td><td>Returns a new DataFrame by renaming an existing column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumnRenamed(self, existing, new)</td></tr><tr><td><font color="blue">Corr</font></td><td>Calculates the correlation of two columns of a DataFrame as a double value. Currently only supports the Pearson Correlation Coefficient. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, corr(self, col1, col2, method=None)</td></tr><tr><td><font color="blue">Cov</font></td><td>Calculate the sample covariance of two columns as a double value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, cov(self, col1, col2)</td></tr><tr><td><font color="blue">FreqItems</font></td><td>Finding frequent items for columns, possibly with false positives. Using the frequent element count algorithm described in "http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou". Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, freqItems(self, cols, support=None) Note: This function is meant for exploratory data analysis, as we make no guarantee about the backward compatibility of the schema of the resulting DataFrame.</td></tr><tr><td><font color="blue">Crosstab</font></td><td>Computes a pair-wise frequency table of the given columns. Also known as a contingency table. The number of distinct values for each column should be less than 1e4. At most 1e6 non-zero pair frequencies will be returned. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, crosstab(self, col1, col2)</td></tr><tr><td><font color="blue">Describe</font></td><td>Computes statistics for numeric columns. This include count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical columns.</td></tr><tr><td><font color="blue">Limit</font></td><td>Returns a new DataFrame by taking the first `n` rows. The difference between this function and `head` is that `head` returns an array while `limit` returns a new DataFrame.</td></tr><tr><td><font color="blue">Head</font></td><td>Returns the first `n` rows.</td></tr><tr><td><font color="blue">First</font></td><td>Returns the first row.</td></tr><tr><td><font color="blue">Take</font></td><td>Returns the first `n` rows in the DataFrame.</td></tr><tr><td><font color="blue">Distinct</font></td><td>Returns a new DataFrame that contains only the unique rows from this DataFrame.</td></tr><tr><td><font color="blue">Coalesce</font></td><td>Returns a new DataFrame that has exactly `numPartitions` partitions. Similar to coalesce defined on an RDD, this operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of the current partitions.</td></tr><tr><td><font color="blue">Persist</font></td><td>Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)</td></tr><tr><td><font color="blue">Unpersist</font></td><td>Mark the DataFrame as non-persistent, and remove all blocks for it from memory and disk.</td></tr><tr><td><font color="blue">Cache</font></td><td>Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new DataFrame that has exactly `numPartitions` partitions.</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.</td></tr><tr><td><font color="blue">Sample</font></td><td>Returns a new DataFrame by sampling a fraction of rows.</td></tr><tr><td><font color="blue">FlatMap``1</font></td><td>Returns a new RDD by first applying a function to all rows of this DataFrame, and then flattening the results.</td></tr><tr><td><font color="blue">Map``1</font></td><td>Returns a new RDD by applying a function to all rows of this DataFrame.</td></tr><tr><td><font color="blue">MapPartitions``1</font></td><td>Returns a new RDD by applying a function to each partition of this DataFrame.</td></tr><tr><td><font color="blue">ForeachPartition</font></td><td>Applies a function f to each partition of this DataFrame.</td></tr><tr><td><font color="blue">Foreach</font></td><td>Applies a function f to all rows.</td></tr><tr><td><font color="blue">Write</font></td><td>Interface for saving the content of the DataFrame out into external storage.</td></tr><tr><td><font color="blue">SaveAsParquetFile</font></td><td>Saves the contents of this DataFrame as a parquet file, preserving the schema. Files that are written out using this method can be read back in as a DataFrame using the `parquetFile` function in SQLContext.</td></tr><tr><td><font color="blue">InsertInto</font></td><td>Adds the rows from this RDD to the specified table, optionally overwriting the existing data.</td></tr><tr><td><font color="blue">SaveAsTable</font></td><td>Creates a table from the the contents of this DataFrame based on a given data source, SaveMode specified by mode, and a set of options. Note that this currently only works with DataFrames that are created from a HiveContext as there is no notion of a persisted catalog in a standard SQL context. Instead you can write an RDD out to a parquet file, and then register that file as a table. This "table" can then be the target of an `insertInto`. Also note that while this function can persist the table metadata into Hive's metastore, the table will NOT be accessible from Hive, until SPARK-7550 is resolved.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the contents of this DataFrame based on the given data source, SaveMode specified by mode, and a set of options.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing any null values.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing null values. If `how` is "any", then drop rows containing any null values. If `how` is "all", then drop rows only if every column is null for that row.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new [[DataFrame]] that drops rows containing null values in the specified columns. If `how` is "any", then drop rows containing any null values in the specified columns. If `how` is "all", then drop rows only if every specified column is null for that row.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing any null values in the specified columns.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values values in the specified columns.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in numeric columns with `value`.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in string columns with `value`.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in specified numeric columns. If a specified column is not a numeric column, it is ignored.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in specified string columns. If a specified column is not a numeric column, it is ignored.</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. The value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`. For example, the following replaces null values in column "A" with string "unknown", and null values in column "B" with numeric value 1.0. import com.google.common.collect.ImmutableMap; df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "*", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height". df.replace("height", ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name". df.replace("name", ImmutableMap.of("UNKNOWN", "unnamed")); // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns. df.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "*", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight". df.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname". df.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));</td></tr><tr><td><font color="blue"></font></td><td>Specifies the input data source format.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.</td></tr><tr><td><font color="blue"></font></td><td>Adds an input option for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Adds input options for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).</td></tr><tr><td><font color="blue"></font></td><td>Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).</td></tr><tr><td><font color="blue"></font></td><td>Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.</td></tr><tr><td><font color="blue"></font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue"></font></td><td>Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.</td></tr><tr><td><font color="blue"></font></td><td>Loads a AVRO file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the underlying output data source. Built-in options include "parquet", "json", etc.</td></tr><tr><td><font color="blue"></font></td><td>Adds an output option for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Adds output options for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame at the specified path.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame as the specified table.</td></tr><tr><td><font color="blue"></font></td><td>Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in AVRO format at the specified path. This is equivalent to: Format("com.databricks.spark.avro").Save(path)</td></tr></table>
 
 ---
   
@@ -688,7 +688,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Format</font></td><td>Specifies the input data source format.</td></tr><tr><td><font color="blue">Schema</font></td><td>Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.</td></tr><tr><td><font color="blue">Option</font></td><td>Adds an input option for the underlying data source.</td></tr><tr><td><font color="blue">Options</font></td><td>Adds input options for the underlying data source.</td></tr><tr><td><font color="blue">Load</font></td><td>Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).</td></tr><tr><td><font color="blue">Load</font></td><td>Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Json</font></td><td>Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue">Parquet</font></td><td>Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Format</font></td><td>Specifies the input data source format.</td></tr><tr><td><font color="blue">Schema</font></td><td>Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.</td></tr><tr><td><font color="blue">Option</font></td><td>Adds an input option for the underlying data source.</td></tr><tr><td><font color="blue">Options</font></td><td>Adds input options for the underlying data source.</td></tr><tr><td><font color="blue">Load</font></td><td>Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).</td></tr><tr><td><font color="blue">Load</font></td><td>Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Json</font></td><td>Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue">Parquet</font></td><td>Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.</td></tr><tr><td><font color="blue">Avro</font></td><td>Loads a AVRO file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr></table>
 
 ---
   
@@ -705,7 +705,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Mode</font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue">Mode</font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue">Format</font></td><td>Specifies the underlying output data source. Built-in options include "parquet", "json", etc.</td></tr><tr><td><font color="blue">Option</font></td><td>Adds an output option for the underlying data source.</td></tr><tr><td><font color="blue">Options</font></td><td>Adds output options for the underlying data source.</td></tr><tr><td><font color="blue">PartitionBy</font></td><td>Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the content of the DataFrame at the specified path.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the content of the DataFrame as the specified table.</td></tr><tr><td><font color="blue">InsertInto</font></td><td>Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.</td></tr><tr><td><font color="blue">SaveAsTable</font></td><td>Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Json</font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)</td></tr><tr><td><font color="blue">Parquet</font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Mode</font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue">Mode</font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue">Format</font></td><td>Specifies the underlying output data source. Built-in options include "parquet", "json", etc.</td></tr><tr><td><font color="blue">Option</font></td><td>Adds an output option for the underlying data source.</td></tr><tr><td><font color="blue">Options</font></td><td>Adds output options for the underlying data source.</td></tr><tr><td><font color="blue">PartitionBy</font></td><td>Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the content of the DataFrame at the specified path.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the content of the DataFrame as the specified table.</td></tr><tr><td><font color="blue">InsertInto</font></td><td>Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.</td></tr><tr><td><font color="blue">SaveAsTable</font></td><td>Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Json</font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)</td></tr><tr><td><font color="blue">Parquet</font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)</td></tr><tr><td><font color="blue">Avro</font></td><td>Saves the content of the DataFrame in AVRO format at the specified path. This is equivalent to: Format("com.databricks.spark.avro").Save(path)</td></tr></table>
 
 ---
   
diff --git a/csharp/AdapterTest/AccumulatorTest.cs b/csharp/AdapterTest/AccumulatorTest.cs
index 24ccfb5..75fb938 100644
--- a/csharp/AdapterTest/AccumulatorTest.cs
+++ b/csharp/AdapterTest/AccumulatorTest.cs
@@ -33,7 +33,7 @@ namespace AdapterTest
             // get accumulator server port and connect to accumuator server
             int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort;
             sock = SocketFactory.CreateSocket();
-            sock.Connect(IPAddress.Loopback, serverPort);
+            sock.Connect(IPAddress.Loopback, serverPort, null);
         }
 
         [TearDown]
diff --git a/csharp/AdapterTest/AdapterTest.csproj b/csharp/AdapterTest/AdapterTest.csproj
index c32ed7a..cbea547 100644
--- a/csharp/AdapterTest/AdapterTest.csproj
+++ b/csharp/AdapterTest/AdapterTest.csproj
@@ -35,22 +35,25 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
+    <Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <HintPath>..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
+    </Reference>
     <Reference Include="Microsoft.CSharp" />
     <Reference Include="Moq, Version=4.2.1510.2205, Culture=neutral, PublicKeyToken=69f491c39445e920, processorArchitecture=MSIL">
       <HintPath>..\packages\Moq.4.2.1510.2205\lib\net40\Moq.dll</HintPath>
       <Private>True</Private>
     </Reference>
-    <Reference Include="Newtonsoft.Json">
-      <HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
+    <Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
+      <HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
     <Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
       <HintPath>..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
       <Private>True</Private>
     </Reference>
-    <Reference Include="Razorvine.Pyrolite">
+    <Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
       <HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
     </Reference>
-    <Reference Include="Razorvine.Serpent">
+    <Reference Include="Razorvine.Serpent, Version=1.12.0.35091, Culture=neutral, processorArchitecture=MSIL">
       <HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
     </Reference>
     <Reference Include="System" />
diff --git a/csharp/AdapterTest/DataFrameTest.cs b/csharp/AdapterTest/DataFrameTest.cs
index d54a9c3..34a6dfb 100644
--- a/csharp/AdapterTest/DataFrameTest.cs
+++ b/csharp/AdapterTest/DataFrameTest.cs
@@ -12,6 +12,7 @@ using Microsoft.Spark.CSharp.Sql;
 using Microsoft.Spark.CSharp.Proxy;
 using NUnit.Framework;
 using Moq;
+using Microsoft.Spark.CSharp.Network;
 
 namespace AdapterTest
 {
@@ -65,10 +66,10 @@ namespace AdapterTest
         [Test]
         public void TestShow()
         {
-            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
+            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
             var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
             dataFrame.Show();
-            mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
+            mockDataFrameProxy.Verify(m => m.GetShowString(20, 20, false), Times.Once);
         }
 
         [Test]
@@ -135,9 +136,9 @@ namespace AdapterTest
             var expectedRows = new Row[] {new MockRow(), new MockRow()};
             var mockRddProxy = new Mock<IRDDProxy>();
             var mockRddCollector = new Mock<IRDDCollector>();
-            mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
+            mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
                 .Returns(expectedRows);
-            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
+            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123,null));
             mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
             mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
             var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
@@ -838,9 +839,9 @@ namespace AdapterTest
             var expectedRows = new Row[] {new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow()};
             var mockRddProxy = new Mock<IRDDProxy>();
             var mockRddCollector = new Mock<IRDDCollector>();
-            mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
+            mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
                 .Returns(expectedRows);
-            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
+            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
             mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
             mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
             mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
@@ -868,9 +869,9 @@ namespace AdapterTest
             var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
             var mockRddProxy = new Mock<IRDDProxy>();
             var mockRddCollector = new Mock<IRDDCollector>();
-            mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
+            mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
                 .Returns(expectedRows);
-            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
+            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
             mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
             mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
             mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
@@ -892,9 +893,9 @@ namespace AdapterTest
             var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
             var mockRddProxy = new Mock<IRDDProxy>();
             var mockRddCollector = new Mock<IRDDCollector>();
-            mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
+            mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
                 .Returns(expectedRows);
-            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
+            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
             mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
             mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
             mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
diff --git a/csharp/AdapterTest/DatasetTest.cs b/csharp/AdapterTest/DatasetTest.cs
index 7ee59db..b900041 100644
--- a/csharp/AdapterTest/DatasetTest.cs
+++ b/csharp/AdapterTest/DatasetTest.cs
@@ -38,12 +38,12 @@ namespace AdapterTest
         public void TestShow()
         {
             Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
-            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
+            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
             mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
 
             var dataset = new Dataset(mockDatasetProxy.Object);
             dataset.Show();
-            mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
+            mockDataFrameProxy.Verify(m => m.GetShowString(20, 20, false), Times.Once);
         }
 
         [Test]
diff --git a/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs b/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs
index a68d408..60e84fb 100644
--- a/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs
@@ -9,6 +9,7 @@ using System.Threading.Tasks;
 using System.Net;
 using System.Net.Sockets;
 using System.IO;
+using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Sql;
 using Razorvine.Pickle;
 using Microsoft.Spark.CSharp.Proxy;
@@ -64,7 +65,7 @@ namespace AdapterTest.Mocks
             throw new NotImplementedException();
         }
 
-        public string GetShowString(int numberOfRows, bool truncate)
+        public string GetShowString(int numberOfRows, int truncate, bool vertical)
         {
             throw new NotImplementedException();
         }
@@ -240,7 +241,12 @@ namespace AdapterTest.Mocks
             throw new NotImplementedException();
         }
 
-        public IDataFrameWriterProxy Write()
+	    public IDataFrameProxy Broadcast()
+	    {
+		    throw new NotImplementedException();
+	    }
+
+	    public IDataFrameWriterProxy Write()
         {
             throw new NotImplementedException();
         }
diff --git a/csharp/AdapterTest/Mocks/MockRDDCollector.cs b/csharp/AdapterTest/Mocks/MockRDDCollector.cs
index 2ec5c62..e9c8c5c 100644
--- a/csharp/AdapterTest/Mocks/MockRDDCollector.cs
+++ b/csharp/AdapterTest/Mocks/MockRDDCollector.cs
@@ -4,12 +4,13 @@ using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Network;
 
 namespace AdapterTest.Mocks
 {
     class MockRDDCollector : IRDDCollector
     {
-        public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
+        public IEnumerable<dynamic> Collect(SocketInfo port, SerializedMode serializedMode, Type type)
         {
             throw new NotImplementedException();
         }
diff --git a/csharp/AdapterTest/Mocks/MockRddProxy.cs b/csharp/AdapterTest/Mocks/MockRddProxy.cs
index 03b0142..9188ea4 100644
--- a/csharp/AdapterTest/Mocks/MockRddProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockRddProxy.cs
@@ -15,6 +15,7 @@ using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Proxy;
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using NUnit.Framework;
+using Microsoft.Spark.CSharp.Network;
 
 namespace AdapterTest.Mocks
 {
@@ -60,7 +61,7 @@ namespace AdapterTest.Mocks
             return union;
         }
 
-        public int CollectAndServe()
+        public SocketInfo CollectAndServe()
         {
             return MockSparkContextProxy.RunJob(this);
         }
diff --git a/csharp/AdapterTest/Mocks/MockRow.cs b/csharp/AdapterTest/Mocks/MockRow.cs
index bfa5b73..a6a9a86 100644
--- a/csharp/AdapterTest/Mocks/MockRow.cs
+++ b/csharp/AdapterTest/Mocks/MockRow.cs
@@ -8,6 +8,13 @@ namespace AdapterTest.Mocks
 {
     public class MockRow : Row
     {
+        public override dynamic[] Values
+        {
+            get
+            {
+                throw new NotImplementedException();
+            }
+        }
 
         public override int Size()
         {
diff --git a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
index 609e591..da8b853 100644
--- a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
@@ -195,7 +195,7 @@ namespace AdapterTest.Mocks
             throw new NotImplementedException();
         }
 
-        internal static int RunJob(IRDDProxy rdd)
+        internal static SocketInfo RunJob(IRDDProxy rdd)
         {
             var mockRdd = (rdd as MockRddProxy);
             IEnumerable<byte[]> result = mockRdd.pickle ? mockRdd.result.Cast<byte[]>() :
@@ -222,10 +222,12 @@ namespace AdapterTest.Mocks
                     ns.Flush();
                 }
             });
-            return (listener.LocalEndPoint as IPEndPoint).Port;
+
+            SocketInfo socketInfo = new SocketInfo((listener.LocalEndPoint as IPEndPoint).Port, null);
+            return  socketInfo;
         }
 
-        public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
+        public SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
         {
             return RunJob(rdd);
         }
diff --git a/csharp/AdapterTest/SocketWrapperTest.cs b/csharp/AdapterTest/SocketWrapperTest.cs
index 3c7fac3..63c2ef8 100644
--- a/csharp/AdapterTest/SocketWrapperTest.cs
+++ b/csharp/AdapterTest/SocketWrapperTest.cs
@@ -86,9 +86,9 @@ namespace AdapterTest
             Assert.Throws<InvalidOperationException>(() => clientSock.GetStream());
             Assert.Throws<InvalidOperationException>(() => clientSock.Receive());
             Assert.Throws<InvalidOperationException>(() => clientSock.Send(null));
-            Assert.Throws<SocketException>(() => clientSock.Connect(IPAddress.Any, 1024));
+            Assert.Throws<SocketException>(() => clientSock.Connect(IPAddress.Any, 1024, null));
 
-            clientSock.Connect(IPAddress.Loopback, port);
+            clientSock.Connect(IPAddress.Loopback, port, null);
 
             // Valid invalid operation
             var byteBuf = ByteBufPool.Default.Allocate();
diff --git a/csharp/AdapterTest/TestWithMoqDemo.cs b/csharp/AdapterTest/TestWithMoqDemo.cs
index 337794b..706413c 100644
--- a/csharp/AdapterTest/TestWithMoqDemo.cs
+++ b/csharp/AdapterTest/TestWithMoqDemo.cs
@@ -80,7 +80,7 @@ namespace AdapterTest
                         ns.Flush();
                     }
                 });
-                return (listener.LocalEndPoint as IPEndPoint).Port;
+                return new SocketInfo((listener.LocalEndPoint as IPEndPoint).Port, null);
             });
             _mockRddProxy.Setup(m => m.RDDCollector).Returns(new RDDCollector());
 
diff --git a/csharp/AdapterTest/packages.config b/csharp/AdapterTest/packages.config
index c3a926b..c7cc11e 100644
--- a/csharp/AdapterTest/packages.config
+++ b/csharp/AdapterTest/packages.config
@@ -1,10 +1,11 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
+  <package id="log4net" version="2.0.8" targetFramework="net45" />
   <package id="Moq" version="4.2.1510.2205" targetFramework="net45" />
-  <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
+  <package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
   <package id="NUnit" version="3.0.1" targetFramework="net45" />
   <package id="NUnit.Console" version="3.0.1" developmentDependency="true" />
+  <package id="OpenCover" version="4.6.166" targetFramework="net45" developmentDependency="true" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="OpenCover" version="4.6.166" targetFramework="net45" developmentDependency="true" />
 </packages>
\ No newline at end of file
diff --git a/csharp/Repl/Repl.csproj b/csharp/Repl/Repl.csproj
index 35d8bd6..faf98f4 100644
--- a/csharp/Repl/Repl.csproj
+++ b/csharp/Repl/Repl.csproj
@@ -34,6 +34,9 @@
     <Prefer32Bit>false</Prefer32Bit>
   </PropertyGroup>
   <ItemGroup>
+    <Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <HintPath>..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
+    </Reference>
     <Reference Include="Microsoft.CodeAnalysis, Version=1.2.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.dll</HintPath>
@@ -50,11 +53,13 @@
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.Scripting.dll</HintPath>
     </Reference>
+    <Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
+      <HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
+    </Reference>
     <Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
-      <SpecificVersion>False</SpecificVersion>
       <HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
     </Reference>
-    <Reference Include="Razorvine.Serpent">
+    <Reference Include="Razorvine.Serpent, Version=1.12.0.35091, Culture=neutral, processorArchitecture=MSIL">
       <HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
     </Reference>
     <Reference Include="System" />
diff --git a/csharp/Repl/packages.config b/csharp/Repl/packages.config
index 76ea838..7c1ac61 100644
--- a/csharp/Repl/packages.config
+++ b/csharp/Repl/packages.config
@@ -1,8 +1,8 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
-  <package id="log4net" version="2.0.5" targetFramework="net45" />
+  <package id="log4net" version="2.0.8" targetFramework="net461" />
   <package id="Microsoft.Net.Compilers" version="1.1.1" targetFramework="net45" />
-  <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
-  <package id="Razorvine.Pyrolite" version="4.10.0" targetFramework="net45" />
-  <package id="Razorvine.Serpent" version="1.12.0" targetFramework="net45" />
+  <package id="Newtonsoft.Json" version="11.0.2" targetFramework="net461" />
+  <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net461" />
+  <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net461" />
 </packages>
\ No newline at end of file
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs
index 5f4e5b4..cb6bac8 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs
@@ -1867,5 +1867,72 @@ namespace Microsoft.Spark.CSharp.Samples
             SparkCLRSamples.FileSystemHelper.DeleteDirectory(path, true);
             Console.WriteLine("Remove directory: {0}", path);
         }
+
+        /// <summary>
+        /// Single UDF Sample
+        /// </summary>
+        [Sample]
+        internal static void SingleUDFSample()
+        {
+            var sqlContext = GetSqlContext();
+            var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
+            peopleDataFrame.RegisterTempTable("peopleDataFrame");
+
+            sqlContext.RegisterFunction("UDF", (int x, int y) => { return x + y; });
+
+            var rowSet = sqlContext.Sql("SELECT * FROM peopleDataFrame where UDF(age, 20) > 60");
+
+            rowSet.Show();
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                Assert.AreEqual(rowSet.Count() ,2);
+            }
+        }
+
+        /// <summary>
+        /// Single UDF Sample with duplicate values
+        /// </summary>
+        [Sample]
+        internal static void SingleUDFWithDupSample()
+        {
+            var sqlContext = GetSqlContext();
+            var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
+            peopleDataFrame.RegisterTempTable("peopleDataFrame");
+
+            sqlContext.RegisterFunction("UDF", (int x, int y) => { return x + y; });
+
+            var rowSet = sqlContext.Sql("SELECT * FROM peopleDataFrame where UDF(age, age) < 50");
+
+            rowSet.Show();
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                Assert.AreEqual(rowSet.Count(), 1);
+            }
+        }
+
+        /// <summary>
+        /// Multiple UDFs sample
+        /// </summary>
+        [Sample]
+        internal static void MultipleUDFSample()
+        {
+            var sqlContext = GetSqlContext();
+            var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
+            peopleDataFrame.RegisterTempTable("peopleDataFrame");
+
+            sqlContext.RegisterFunction("UDF1", (int x, int y) => { return x + y; });
+            sqlContext.RegisterFunction("UDF2", (string name, string id) => { return name + ":"  + id; });
+
+            var rowSet = sqlContext.Sql("SELECT id, name, UDF1(age, 20) AS UDF1, UDF2(name, id) AS UDF2 FROM peopleDataFrame where UDF1(age, 20) > 60");
+
+            rowSet.Show();
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                Assert.AreEqual(rowSet.Count(), 2);
+            }
+        }
     }
 }
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/Program.cs b/csharp/Samples/Microsoft.Spark.CSharp/Program.cs
index 1f25fa2..f9b5af5 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/Program.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/Program.cs
@@ -66,8 +66,10 @@ namespace Microsoft.Spark.CSharp.Samples
 
             if (Configuration.IsValidationEnabled && !status)
             {
-                Environment.Exit(1);
+                Environment.Exit(2);
             }
+
+            Environment.Exit(1);
         }
 
         // Creates and returns a context
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj b/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj
index 880feb2..d28e1d6 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj
+++ b/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj
@@ -33,9 +33,11 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="Newtonsoft.Json, Version=7.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
-      <HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
-      <Private>True</Private>
+    <Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <HintPath>..\..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
+    </Reference>
+    <Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
+      <HintPath>..\..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
     <Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
       <HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/packages.config b/csharp/Samples/Microsoft.Spark.CSharp/packages.config
index 4abe7e9..fc5be33 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/packages.config
+++ b/csharp/Samples/Microsoft.Spark.CSharp/packages.config
@@ -1,5 +1,6 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
-  <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
+  <package id="log4net" version="2.0.8" targetFramework="net45" />
+  <package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
   <package id="NUnit" version="3.0.1" targetFramework="net45" />
 </packages>
\ No newline at end of file
diff --git a/csharp/Tests.Common/Tests.Common.csproj b/csharp/Tests.Common/Tests.Common.csproj
index 361031e..a2ca2c9 100644
--- a/csharp/Tests.Common/Tests.Common.csproj
+++ b/csharp/Tests.Common/Tests.Common.csproj
@@ -36,11 +36,10 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
-      <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
-    </Reference>
     <Reference Include="Microsoft.CSharp" />
+    <Reference Include="Newtonsoft.Json">
+      <HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
+    </Reference>
     <Reference Include="Razorvine.Pyrolite">
       <HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
     </Reference>
diff --git a/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileStatus.cs b/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileStatus.cs
new file mode 100644
index 0000000..0222849
--- /dev/null
+++ b/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileStatus.cs
@@ -0,0 +1,57 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
+
+namespace Microsoft.Spark.CSharp.Utils.FileSystem
+{
+	/// <summary>
+	/// See https://hadoop.apache.org/docs/r2.6.1/api/org/apache/hadoop/fs/FileStatus.html
+	/// </summary>
+	public class HdfsFileStatus
+	{
+		public long Length => _status.Value.Length;
+		public long ModificationTime => _status.Value.Time;
+		public string Owner => _status.Value.Owner;
+		public string Path => _status.Value.Path;
+		public bool IsFile => _status.Value.IsFile;
+		public bool IsDirectory => _status.Value.IsDirectory;
+		public bool IsSymlink => _status.Value.IsSymlink;
+
+		private Lazy<Status> _status;
+
+		internal HdfsFileStatus(JvmObjectReference obj)
+		{
+			_status = new Lazy<Status>(()=>new Status(obj));
+		}
+
+		private class Status
+		{
+			public long Length;
+			public long Time;
+			public string Owner;
+			public string Path;
+			public bool IsFile;
+			public bool IsDirectory;
+			public bool IsSymlink;
+
+			public Status(JvmObjectReference obj)
+			{
+				Length = (long) SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getLen");
+				Time = (long)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getModificationTime");
+				Owner = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getOwner");
+				IsFile = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isFile");
+				IsDirectory = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isDirectory");
+				IsSymlink = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isSymlink");
+				var pr = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getPath"));
+				Path = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pr, "getName");
+			}
+		}
+	}
+}
diff --git a/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileSystemHelper.cs b/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileSystemHelper.cs
index 52d20c3..c88c93b 100644
--- a/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileSystemHelper.cs
+++ b/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileSystemHelper.cs
@@ -4,8 +4,11 @@
 using System;
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using Microsoft.Spark.CSharp.Interop;
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Proxy.Ipc;
+using Microsoft.Spark.CSharp.Utils.FileSystem;
 
 namespace Microsoft.Spark.CSharp.Utils
 {
@@ -18,7 +21,7 @@ namespace Microsoft.Spark.CSharp.Utils
 
         public HdfsFileSystemHelper()
         {
-            var jvmConfReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.conf.Configuration");
+	        var jvmConfReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.conf.Configuration");
             jvmHdfsReference = new JvmObjectReference((string) SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.hadoop.fs.FileSystem", "get", jvmConfReference));
         }
 
@@ -39,16 +42,25 @@ namespace Microsoft.Spark.CSharp.Utils
             for (var i = 0; i < statusList.Count; i++)
             {
                 var subPathJvmReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(statusList[i], "getPath"));
-                files[i] = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(subPathJvmReference, "getName");
+                files[i] = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(subPathJvmReference, "getName");	
             }
 
             return files;
         }
 
-        /// <summary>
-        /// Build a temp file path under '/tmp' path on HDFS.
-        /// </summary>
-        public string GetTempFileName()
+		/// <summary>
+		/// List the names of all the files under the given path.
+		/// </summary>
+		public IEnumerable<HdfsFileStatus> ListStatus(string path)
+		{
+			var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
+			return ((List<JvmObjectReference>)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "listStatus", pathJvmReference)).Select(r=>new HdfsFileStatus(r));
+		}
+
+		/// <summary>
+		/// Build a temp file path under '/tmp' path on HDFS.
+		/// </summary>
+		public string GetTempFileName()
         {
             return "/tmp/" + Guid.NewGuid().ToString("N");
         }
@@ -91,5 +103,37 @@ namespace Microsoft.Spark.CSharp.Utils
             var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
             return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "delete", pathJvmReference, recursive);
         }
-    }
+
+	    public bool IsFile(string path)
+	    {
+			var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
+			return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "isFile", pathJvmReference);
+		}
+
+		public bool IsDirectory(string path)
+		{
+			var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
+			return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "isDirectory", pathJvmReference);
+		}
+
+		public bool Touch(string path)
+		{
+			var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
+			return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "createNewFile", pathJvmReference);
+		}
+
+		public void CopyFromLocalFile(string src, string dest)
+		{
+			var from = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", new Uri(src).AbsoluteUri);
+			var to = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", dest);
+			SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "copyFromLocalFile", from, to);
+		}
+
+		public void CopyToLocalFile(string src, string dest)
+		{
+			var to = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", new Uri(dest).AbsoluteUri);
+			var from = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", src);
+			SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "copyToLocalFile", from, to);
+		}
+	}
 }
diff --git a/csharp/Utils/Microsoft.Spark.CSharp/Utils.csproj b/csharp/Utils/Microsoft.Spark.CSharp/Utils.csproj
index 60657c7..d089d7d 100644
--- a/csharp/Utils/Microsoft.Spark.CSharp/Utils.csproj
+++ b/csharp/Utils/Microsoft.Spark.CSharp/Utils.csproj
@@ -40,6 +40,7 @@
     <Reference Include="Microsoft.CSharp" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="FileSystem\HdfsFileStatus.cs" />
     <Compile Include="FileSystem\LocalFileSystemHelper.cs" />
     <Compile Include="FileSystem\HdfsFileSystemHelper.cs" />
     <Compile Include="FileSystem\FileSystemHelper.cs" />
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/MultiThreadWorker.cs b/csharp/Worker/Microsoft.Spark.CSharp/MultiThreadWorker.cs
index f73e90b..6fb9e1a 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/MultiThreadWorker.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/MultiThreadWorker.cs
@@ -111,7 +111,8 @@ namespace Microsoft.Spark.CSharp
 
             bool sparkReuseWorker = false;
             string envVar = Environment.GetEnvironmentVariable("SPARK_REUSE_WORKER"); // this envVar is set in JVM side
-            if ((envVar != null) && envVar.Equals("1"))
+			var secret = Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_SECRET");
+			if ((envVar != null) && envVar.Equals("1"))
             {
                 sparkReuseWorker = true;
             }
@@ -130,7 +131,7 @@ namespace Microsoft.Spark.CSharp
                         SerDe.Write(s, trId); // write taskRunnerId to JVM side
                         s.Flush();
                     }
-                    TaskRunner taskRunner = new TaskRunner(trId, socket, sparkReuseWorker);
+                    TaskRunner taskRunner = new TaskRunner(trId, socket, sparkReuseWorker, secret);
                     waitingTaskRunners.Add(taskRunner);
                     taskRunnerRegistry[trId] = taskRunner;
                     trId++;
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/TaskRunner.cs b/csharp/Worker/Microsoft.Spark.CSharp/TaskRunner.cs
index fb88e43..fb39856 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/TaskRunner.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/TaskRunner.cs
@@ -3,7 +3,9 @@
 
 using System;
 using System.IO;
+using System.Net;
 using System.Runtime.CompilerServices;
+using System.Text;
 using System.Threading;
 using Microsoft.Spark.CSharp.Configuration;
 using Microsoft.Spark.CSharp.Interop.Ipc;
@@ -13,106 +15,116 @@ using Microsoft.Spark.CSharp.Services;
 [assembly: InternalsVisibleTo("WorkerTest")]
 namespace Microsoft.Spark.CSharp
 {
-    /// <summary>
-    /// TaskRunner is used to run Spark task assigned by JVM side. It uses a TCP socket to
-    /// communicate with JVM side. This socket may be reused to run multiple Spark tasks.
-    /// </summary>
-    internal class TaskRunner
-    {
-        private static ILoggerService logger;
-        private static ILoggerService Logger
-        {
-            get
-            {
-                if (logger != null) return logger;
-                logger = LoggerServiceFactory.GetLogger(typeof(TaskRunner));
-                return logger;
-            }
-        }
+	/// <summary>
+	/// TaskRunner is used to run Spark task assigned by JVM side. It uses a TCP socket to
+	/// communicate with JVM side. This socket may be reused to run multiple Spark tasks.
+	/// </summary>
+	internal class TaskRunner
+	{
+		private static ILoggerService logger;
+		private static ILoggerService Logger
+		{
+			get
+			{
+				if (logger != null) return logger;
+				logger = LoggerServiceFactory.GetLogger(typeof(TaskRunner));
+				return logger;
+			}
+		}
 
-        private readonly ISocketWrapper socket;  // Socket to communicate with JVM
-        private volatile bool stop;
-        private readonly bool socketReuse; // whether the socket can be reused to run multiple Spark tasks
+		private readonly ISocketWrapper socket;  // Socket to communicate with JVM
+		private volatile bool stop;
+		private readonly bool socketReuse; // whether the socket can be reused to run multiple Spark tasks
+		private string secret;
 
-        /// <summary>
-        /// Task runner Id
-        /// </summary>
-        public int TaskId { get; private set; }
+		/// <summary>
+		/// Task runner Id
+		/// </summary>
+		public int TaskId { get; private set; }
 
-        public TaskRunner(int trId, ISocketWrapper socket, bool socketReuse)
-        {
-            TaskId = trId;
-            this.socket = socket;
-            this.socketReuse = socketReuse;
-        }
+		public TaskRunner(int trId, ISocketWrapper socket, bool socketReuse, string secret)
+		{
+			TaskId = trId;
+			this.socket = socket;
+			this.socketReuse = socketReuse;
+			this.secret = secret;
+		}
 
-        public void Run()
-        {
-            Logger.LogInfo("TaskRunner [{0}] is running ...", TaskId);
+		public void Run()
+		{
+			Logger.LogInfo("TaskRunner [{0}] is running ...", TaskId);
 
-            try
-            {
-                while (!stop)
-                {
-                    using (var inputStream = socket.GetInputStream())
-                    using (var outputStream = socket.GetOutputStream())
-                    {
-                        byte[] bytes = SerDe.ReadBytes(inputStream, sizeof(int));
-                        if (bytes != null)
-                        {
-                            int splitIndex = SerDe.ToInt(bytes);
-                            bool readComplete = Worker.ProcessStream(inputStream, outputStream, splitIndex);
-                            outputStream.Flush();
-                            if (!readComplete) // if the socket is not read through completely, then it can't be reused
-                            {
-                                stop = true;
-                                // wait for server to complete, otherwise server may get 'connection reset' exception
-                                Logger.LogInfo("Sleep 500 millisecond to close socket ...");
-                                Thread.Sleep(500);
-                            }
-                            else if (!socketReuse)
-                            {
-                                stop = true;
-                                // wait for server to complete, otherwise server gets 'connection reset' exception
-                                // Use SerDe.ReadBytes() to detect java side has closed socket properly
-                                // ReadBytes() will block until the socket is closed
-                                Logger.LogInfo("waiting JVM side to close socket...");
-                                SerDe.ReadBytes(inputStream);
-                                Logger.LogInfo("JVM side has closed socket");
-                            }
-                        }
-                        else
-                        {
-                            stop = true;
-                            Logger.LogWarn("read null splitIndex, socket is closed by JVM");
-                        }
-                    }
-                }
-            }
-            catch (Exception e)
-            {
-                stop = true;
-                Logger.LogError("TaskRunner [{0}] exeption, will dispose this TaskRunner", TaskId);
-                Logger.LogException(e);
-            }
-            finally
-            {
-                try
-                {
-                    socket.Close();
-                }
-                catch (Exception ex)
-                {
-                    Logger.LogWarn("close socket exception: {0}", ex);
-                }
-                Logger.LogInfo("TaskRunner [{0}] finished", TaskId);
-            }
-        }
+			try
+			{
+				while (!stop)
+				{
+					using (var inputStream = socket.GetInputStream())
+					using (var outputStream = socket.GetOutputStream())
+					{
+						if (!string.IsNullOrEmpty(secret))
+						{
+							SerDe.Write(outputStream, secret);
+							outputStream.Flush();
+							var reply = SerDe.ReadString(inputStream);
+							Logger.LogDebug("Connect back to JVM: " + reply);
+							secret = null;
+						}
+						byte[] bytes = SerDe.ReadBytes(inputStream, sizeof(int));
+						if (bytes != null)
+						{
+							int splitIndex = SerDe.ToInt(bytes);
+							bool readComplete = Worker.ProcessStream(inputStream, outputStream, splitIndex);
+							outputStream.Flush();
+							if (!readComplete) // if the socket is not read through completely, then it can't be reused
+							{
+								stop = true;
+								// wait for server to complete, otherwise server may get 'connection reset' exception
+								Logger.LogInfo("Sleep 500 millisecond to close socket ...");
+								Thread.Sleep(500);
+							}
+							else if (!socketReuse)
+							{
+								stop = true;
+								// wait for server to complete, otherwise server gets 'connection reset' exception
+								// Use SerDe.ReadBytes() to detect java side has closed socket properly
+								// ReadBytes() will block until the socket is closed
+								Logger.LogInfo("waiting JVM side to close socket...");
+								SerDe.ReadBytes(inputStream);
+								Logger.LogInfo("JVM side has closed socket");
+							}
+						}
+						else
+						{
+							stop = true;
+							Logger.LogWarn("read null splitIndex, socket is closed by JVM");
+						}
+					}
+				}
+			}
+			catch (Exception e)
+			{
+				stop = true;
+				Logger.LogError("TaskRunner [{0}] exeption, will dispose this TaskRunner", TaskId);
+				Logger.LogException(e);
+			}
+			finally
+			{
+				try
+				{
+					socket.Close();
+				}
+				catch (Exception ex)
+				{
+					Logger.LogWarn("close socket exception: {0}", ex);
+				}
+				Logger.LogInfo("TaskRunner [{0}] finished", TaskId);
+			}
+		}
 
-        public void Stop()
-        {
-            Logger.LogInfo("try to stop TaskRunner [{0}]", TaskId);
-            stop = true;
-        }
-    }
+		public void Stop()
+		{
+			Logger.LogInfo("try to stop TaskRunner [{0}]", TaskId);
+			stop = true;
+		}
+	}
 }
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/UDFCommand.cs b/csharp/Worker/Microsoft.Spark.CSharp/UDFCommand.cs
new file mode 100644
index 0000000..43cf6b5
--- /dev/null
+++ b/csharp/Worker/Microsoft.Spark.CSharp/UDFCommand.cs
@@ -0,0 +1,391 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Services;
+using Microsoft.Spark.CSharp.Sql;
+using Razorvine.Pickle;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.Serialization;
+using System.Runtime.Serialization.Formatters.Binary;
+
+namespace Microsoft.Spark.CSharp
+{
+    /// <summary>
+    /// This class execute user defined methods.    
+    /// </summary>
+
+    internal class UDFCommand
+    {
+        private readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
+        private ILoggerService logger;
+        private Stream inputStream;
+        private Stream outputStream;
+        private int splitIndex;
+        private DateTime bootTime;
+        private string deserializerMode;
+        private string serializerMode;
+        private IFormatter formatter;
+        private Stopwatch commandProcessWatch;
+        private int isSqlUdf;
+        private List<WorkerFunc> workerFuncList;
+        private int stageId;
+
+        public UDFCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime, 
+            string deserializerMode, string serializerMode, IFormatter formatter, 
+            Stopwatch commandProcessWatch, int isSqlUdf, List<WorkerFunc> workerFuncList, int stageId)
+        {
+            this.inputStream = inputStream;
+            this.outputStream = outputStream;
+            this.splitIndex = splitIndex;
+            this.bootTime = bootTime;
+            this.deserializerMode = deserializerMode;
+            this.serializerMode = serializerMode;
+            this.formatter = formatter;
+            this.commandProcessWatch = commandProcessWatch;
+            this.isSqlUdf = isSqlUdf;
+            this.workerFuncList = workerFuncList;
+            this.stageId = stageId;
+
+            InitializeLogger();
+        }
+
+        private void InitializeLogger()
+        {
+            try
+            {
+                // if there exists exe.config file, then use log4net
+                if (File.Exists(AppDomain.CurrentDomain.SetupInformation.ConfigurationFile))
+                {
+                    LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance);
+                }
+
+                logger = LoggerServiceFactory.GetLogger(typeof(UDFCommand));
+            }
+            catch (Exception e)
+            {
+                Console.WriteLine("InitializeLogger exception {0}, will exit", e);
+                Environment.Exit(-1);
+            }
+        }
+
+        internal void Execute()
+        {
+            if (isSqlUdf == 0)
+            {
+                ExecuteNonSqlUDF();
+            }
+            else
+            {
+                ExecuteSqlUDF();
+            }
+        }
+
+        private void ExecuteNonSqlUDF()
+        {
+            int count = 0;
+            int nullMessageCount = 0;
+            logger.LogDebug("Beginning to execute non sql func");
+            WorkerFunc workerFunc = workerFuncList[0];
+            var func = workerFunc.CharpWorkerFunc.Func;
+
+            var funcProcessWatch = Stopwatch.StartNew();
+            DateTime initTime = DateTime.UtcNow;
+            foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
+            {
+                funcProcessWatch.Stop();
+
+                if (object.ReferenceEquals(null, message))
+                {
+                    nullMessageCount++;
+                    continue;
+                }
+
+                try
+                {
+                    WriteOutput(outputStream, serializerMode, message, formatter);
+                }
+                catch (Exception ex)
+                {
+                    logger.LogError("WriteOutput() failed at iteration {0}, execption {1}", count, ex);
+                    throw;
+                }
+
+                count++;
+                funcProcessWatch.Start();
+            }
+
+            logger.LogInfo("Output entries count: " + count);
+            logger.LogDebug("Null messages count: " + nullMessageCount);
+
+            WriteDiagnosticsInfo(outputStream, bootTime, initTime);
+
+            commandProcessWatch.Stop();
+
+            // log statistics
+            logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
+            logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
+        }
+
+        private void ExecuteSqlUDF()
+        {
+            int count = 0;
+            int nullMessageCount = 0;
+            logger.LogDebug("Beginning to execute sql func");
+
+            var funcProcessWatch = Stopwatch.StartNew();
+            DateTime initTime = DateTime.UtcNow;
+
+            foreach (var row in GetIterator(inputStream, deserializerMode, isSqlUdf))
+            {                               
+                List<Object> messages = new List<Object>();
+               
+                foreach (WorkerFunc workerFunc in workerFuncList)
+                {
+                    List<Object> args = new List<Object>();
+                    foreach (int offset in workerFunc.ArgOffsets)
+                    {                        
+                        args.Add(row[offset]);
+                    }
+
+                    foreach (var message in workerFunc.CharpWorkerFunc.Func(splitIndex, new[] { args.ToArray()}))
+                    {
+                        funcProcessWatch.Stop();
+
+                        if (object.ReferenceEquals(null, message))
+                        {
+                            nullMessageCount++;
+                            continue;
+                        }
+
+                        messages.Add(message);
+                    }
+                }
+
+                try
+                {
+                    dynamic res = messages.ToArray();
+                    if (messages.Count == 1)
+                    {
+                        res = messages[0];
+                    }
+
+                    WriteOutput(outputStream, serializerMode, res, formatter);
+                }
+                catch (Exception ex)
+                {
+                    logger.LogError("WriteOutput() failed at iteration {0}, exception error {1}", count, ex.Message);
+                    throw;
+                }
+
+                count++;
+                funcProcessWatch.Start();
+            }
+
+            logger.LogInfo("Output entries count: " + count);
+            logger.LogDebug("Null messages count: " + nullMessageCount);
+
+            WriteDiagnosticsInfo(outputStream, bootTime, initTime);
+
+            commandProcessWatch.Stop();
+
+            // log statistics
+            logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
+            logger.LogInfo("stage {0}, command process time: {0}", stageId, commandProcessWatch.ElapsedMilliseconds);
+        }
+
+        private void WriteOutput(Stream networkStream, string serializerMode, dynamic message, IFormatter formatter)
+        {
+            var buffer = GetSerializedMessage(serializerMode, message, formatter);
+            if (buffer == null)
+            {
+                logger.LogError("Buffer is null");
+            }
+
+            if (buffer.Length <= 0)
+            {
+                logger.LogError("Buffer length {0} cannot be <= 0", buffer.Length);
+            }
+
+            SerDe.Write(networkStream, buffer.Length);
+            SerDe.Write(networkStream, buffer);
+        }
+
+        private byte[] GetSerializedMessage(string serializerMode, dynamic message, IFormatter formatter)
+        {
+            byte[] buffer;
+
+            switch ((SerializedMode)Enum.Parse(typeof(SerializedMode), serializerMode))
+            {
+                case SerializedMode.None:
+                    buffer = message as byte[];
+                    break;
+
+                case SerializedMode.String:
+                    buffer = SerDe.ToBytes(message as string);
+                    break;
+
+                case SerializedMode.Row:
+                    var pickler = new Pickler();
+                    buffer = pickler.dumps(new ArrayList { message });
+                    break;
+
+                default:
+                    try
+                    {
+                        var ms = new MemoryStream();
+                        formatter.Serialize(ms, message);
+                        buffer = ms.ToArray();
+                    }
+                    catch (Exception ex)
+                    {
+                        logger.LogError("Exception serializing output: " + ex);
+                        logger.LogError("{0} : {1}", message.GetType().Name, message.GetType().FullName);
+                        throw;
+                    }
+                    break;
+            }
+
+            return buffer;
+        }
+
+        private void WriteDiagnosticsInfo(Stream networkStream, DateTime bootTime, DateTime initTime)
+        {
+            DateTime finishTime = DateTime.UtcNow;
+            const string format = "MM/dd/yyyy hh:mm:ss.fff tt";
+
+            logger.LogDebug("bootTime: {0}, initTime: {1}, finish_time: {2}",
+                bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format));
+
+            SerDe.Write(networkStream, (int)SpecialLengths.TIMING_DATA);
+            SerDe.Write(networkStream, ToUnixTime(bootTime));
+            SerDe.Write(networkStream, ToUnixTime(initTime));
+            SerDe.Write(networkStream, ToUnixTime(finishTime));
+
+            SerDe.Write(networkStream, 0L); //shuffle.MemoryBytesSpilled  
+            SerDe.Write(networkStream, 0L); //shuffle.DiskBytesSpilled
+        }
+
+        private long ToUnixTime(DateTime dt)
+        {
+            return (long)(dt - UnixTimeEpoch).TotalMilliseconds;
+        }
+
+        private IEnumerable<dynamic> GetIterator(Stream inputStream, string serializedMode, int isFuncSqlUdf)
+        {
+            logger.LogInfo("Serialized mode in GetIterator: " + serializedMode);           
+            IFormatter formatter = new BinaryFormatter();
+            var mode = (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode);
+            int messageLength;
+            Stopwatch watch = Stopwatch.StartNew();
+            Row tempRow = null;
+
+            while ((messageLength = SerDe.ReadInt(inputStream)) != (int)SpecialLengths.END_OF_DATA_SECTION)
+            {
+                watch.Stop();
+                if (messageLength > 0 || messageLength == (int)SpecialLengths.NULL)
+                {
+                    watch.Start();
+                    byte[] buffer = messageLength > 0 ? SerDe.ReadBytes(inputStream, messageLength) : null;
+                    watch.Stop();
+                    switch (mode)
+                    {
+                        case SerializedMode.String:
+                            {
+                                if (messageLength > 0)
+                                {
+                                    if (buffer == null)
+                                    {
+                                        logger.LogDebug("Buffer is null. Message length is {0}", messageLength);
+                                    }
+                                    yield return SerDe.ToString(buffer);
+                                }
+                                else
+                                {
+                                    yield return null;
+                                }
+                                break;
+                            }
+
+                        case SerializedMode.Row:
+                            {
+                                Debug.Assert(messageLength > 0);
+                                var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
+
+                                if (isFuncSqlUdf == 0)
+                                {
+                                    foreach (var row in unpickledObjects.Select(item => (item as RowConstructor).GetRow()))
+                                    {
+                                        yield return row;
+                                    }
+                                }
+                                else
+                                {
+                                    foreach (var row in unpickledObjects)
+                                    {
+                                        yield return row;
+                                    }
+                                }
+
+                                break;
+                            }
+
+                        case SerializedMode.Pair:
+                            {
+                                byte[] pairKey = buffer;
+                                byte[] pairValue;
+
+                                watch.Start();
+                                int valueLength = SerDe.ReadInt(inputStream);
+                                if (valueLength > 0)
+                                {
+                                    pairValue = SerDe.ReadBytes(inputStream, valueLength);
+                                }
+                                else if (valueLength == (int)SpecialLengths.NULL)
+                                {
+                                    pairValue = null;
+                                }
+                                else
+                                {
+                                    throw new Exception(string.Format("unexpected valueLength: {0}", valueLength));
+                                }
+                                watch.Stop();
+
+                                yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
+                                break;
+                            }
+
+                        case SerializedMode.None: //just return raw bytes
+                            {
+                                yield return buffer;
+                                break;
+                            }
+
+                        default:
+                            {
+                                if (buffer != null)
+                                {
+                                    var ms = new MemoryStream(buffer);
+                                    yield return formatter.Deserialize(ms);
+                                }
+                                else
+                                {
+                                    yield return null;
+                                }
+                                break;
+                            }
+                    }
+                }
+                watch.Start();
+            }
+
+            logger.LogInfo("total receive time: {0}", watch.ElapsedMilliseconds);
+        }
+    }
+}
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs b/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
index 486a1bc..c034ca6 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
@@ -2,7 +2,6 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System;
-using System.Collections;
 using System.Collections.Concurrent;
 using System.Collections.Generic;
 using System.IO;
@@ -17,8 +16,6 @@ using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Services;
-using Microsoft.Spark.CSharp.Sql;
-using Razorvine.Pickle;
 
 namespace Microsoft.Spark.CSharp
 {
@@ -31,7 +28,6 @@ namespace Microsoft.Spark.CSharp
     /// </summary>
     public class Worker
     {
-        private static readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
         private static ILoggerService logger;
         private static SparkCLRAssemblyHandler assemblyHandler;
 
@@ -81,11 +77,13 @@ namespace Microsoft.Spark.CSharp
                 InitializeLogger();
                 logger.LogInfo("RunSimpleWorker ...");
                 PrintFiles();
-
-                int javaPort = int.Parse(Console.ReadLine()); //reading port number written from JVM
-                logger.LogDebug("Port number used to pipe in/out data between JVM and CLR {0}", javaPort);
+                //int javaPort = int.Parse(Console.ReadLine()); //reading port number written from JVM
+	            var javaPort = int.Parse(Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT"));
+	            var secret = Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_SECRET");
+				logger.LogDebug("Port and secret number used to pipe in/out data between JVM and CLR {0} {1}", javaPort, secret);
                 var socket = InitializeSocket(javaPort);
-                TaskRunner taskRunner = new TaskRunner(0, socket, false);
+	            //Microsoft.Spark.CSharp.Network.Utils.DoServerAuth(socket, secret);
+				TaskRunner taskRunner = new TaskRunner(0, socket, false, secret);
                 taskRunner.Run();
             }
             catch (Exception e)
@@ -119,7 +117,7 @@ namespace Microsoft.Spark.CSharp
         private static ISocketWrapper InitializeSocket(int javaPort)
         {
             var socket = SocketFactory.CreateSocket();
-            socket.Connect(IPAddress.Loopback, javaPort);
+            socket.Connect(IPAddress.Loopback, javaPort, null);
             return socket;
         }
 
@@ -138,9 +136,13 @@ namespace Microsoft.Spark.CSharp
                 //// initialize global state
                 //shuffle.MemoryBytesSpilled = 0
                 //shuffle.DiskBytesSpilled = 0
+	            SerDe.ReadInt(inputStream);
+				SerDe.ReadInt(inputStream);
+				SerDe.ReadInt(inputStream);
+				SerDe.ReadLong(inputStream);
 
-                // fetch name of workdir
-                string sparkFilesDir = SerDe.ReadString(inputStream);
+				// fetch name of workdir
+				string sparkFilesDir = SerDe.ReadString(inputStream);
                 logger.LogDebug("spark_files_dir: " + sparkFilesDir);
                 //SparkFiles._root_directory = sparkFilesDir
                 //SparkFiles._is_running_on_worker = True
@@ -149,7 +151,7 @@ namespace Microsoft.Spark.CSharp
 
                 ProcessBroadcastVariables(inputStream);
 
-                Accumulator.threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
+	            Accumulator.threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
 
                 var formatter = ProcessCommand(inputStream, outputStream, splitIndex, bootTime);
 
@@ -255,96 +257,119 @@ namespace Microsoft.Spark.CSharp
             logger.LogDebug("Is func Sql UDF = {0}", isSqlUdf);
 
             IFormatter formatter = new BinaryFormatter();
+            UDFCommand command = null;
 
             if (isSqlUdf == 0)
             {
-                logger.LogDebug("Processing non-UDF command");
-                int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
-                logger.LogDebug("Command length: " + lengthOfCommandByteArray);
-
-                if (lengthOfCommandByteArray > 0)
-                {
-                    var commandProcessWatch = new Stopwatch();
-                    commandProcessWatch.Start();
-
-                    int stageId;
-                    string deserializerMode;
-                    string serializerMode;
-                    CSharpWorkerFunc workerFunc;
-                    ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
-                        out workerFunc);
-
-                    ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, workerFunc, serializerMode,
-                        formatter, commandProcessWatch, stageId, isSqlUdf);
-                }
-                else
-                {
-                    logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-(");
-                }
+                command = ProcessNonUdfCommand(inputStream, outputStream, splitIndex, bootTime, formatter, isSqlUdf);
             }
             else
             {
-                logger.LogDebug("Processing UDF command");
-                var udfCount = SerDe.ReadInt(inputStream);
-                logger.LogDebug("Count of UDFs = {0}", udfCount);
+                command = ProcessUdfCommand(inputStream, outputStream, splitIndex, bootTime, formatter, isSqlUdf);
+            }
 
-                if (udfCount == 1)
-                {
-                    CSharpWorkerFunc func = null;
-                    var argCount = SerDe.ReadInt(inputStream);
-                    logger.LogDebug("Count of args = {0}", argCount);
-
-                    var argOffsets = new List<int>();
-
-                    for (int argIndex = 0; argIndex < argCount; argIndex++)
-                    {
-                        var offset = SerDe.ReadInt(inputStream);
-                        logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
-                        argOffsets.Add(offset);
-                    }
-                    var chainedFuncCount = SerDe.ReadInt(inputStream);
-                    logger.LogDebug("Count of chained func = {0}", chainedFuncCount);
-
-                    var commandProcessWatch = new Stopwatch();
-                    int stageId = -1;
-                    string deserializerMode = null;
-                    string serializerMode = null;
-                    for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
-                    {
-                        int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
-                        logger.LogDebug("UDF command length: " + lengthOfCommandByteArray)
-                        ;
-
-                        if (lengthOfCommandByteArray > 0)
-                        {
-                            CSharpWorkerFunc workerFunc;
-                            ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
-                                out workerFunc);
-
-                            func = func == null ? workerFunc : CSharpWorkerFunc.Chain(func, workerFunc);
-                        }
-                        else
-                        {
-                            logger.LogWarn("UDF lengthOfCommandByteArray = 0. Nothing to execute :-(");
-                        }
-                    }
-
-                    Debug.Assert(stageId != -1);
-                    Debug.Assert(deserializerMode != null);
-                    Debug.Assert(serializerMode != null);
-                    Debug.Assert(func != null);
-                    ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, func, serializerMode, formatter,
-                        commandProcessWatch, stageId, isSqlUdf);
-                }
-                else
-                {
-                    throw new NotSupportedException(); //TODO - add support for multiple UDFs
-                }
+            if (command != null)
+            {
+                command.Execute();
             }
 
             return formatter;
         }
 
+        private static UDFCommand ProcessNonUdfCommand(Stream inputStream, Stream outputStream, int splitIndex, 
+            DateTime bootTime, IFormatter formatter, int isSqlUdf)
+        {
+            logger.LogDebug("Processing non-UDF command");
+            int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
+            logger.LogDebug("Command length: " + lengthOfCommandByteArray);
+
+            UDFCommand command = null;
+            if (lengthOfCommandByteArray > 0)
+            {
+                var commandProcessWatch = new Stopwatch();
+                commandProcessWatch.Start();
+
+                int stageId;
+                string deserializerMode;
+                string serializerMode;
+                CSharpWorkerFunc cSharpWorkerFunc;
+                ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
+                    out cSharpWorkerFunc);
+
+                command = new UDFCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode,
+                    serializerMode, formatter, commandProcessWatch, isSqlUdf,
+                    new List<WorkerFunc>() { new WorkerFunc(cSharpWorkerFunc, 0, null) }, stageId);
+
+            }
+            else
+            {
+                logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-(");
+            }
+
+            return command;
+        }
+
+        private static UDFCommand ProcessUdfCommand(Stream inputStream, Stream outputStream, int splitIndex,
+            DateTime bootTime, IFormatter formatter, int isSqlUdf)
+        {
+            logger.LogDebug("Processing UDF command");
+            var udfCount = SerDe.ReadInt(inputStream);
+            logger.LogDebug("Count of UDFs = {0}", udfCount);
+
+            int stageId = -1;
+            string deserializerMode = null;
+            string serializerMode = null;
+            var commandProcessWatch = new Stopwatch();
+            List<WorkerFunc> workerFuncList = new List<WorkerFunc>();
+
+            for(int udfIter = 0; udfIter < udfCount; udfIter++)
+            { 
+                CSharpWorkerFunc func = null;
+                var argCount = SerDe.ReadInt(inputStream);
+                logger.LogDebug("Count of args = {0}", argCount);
+
+                List<int> argOffsets = new List<int>();
+                for (int argIndex = 0; argIndex < argCount; argIndex++)
+                {
+                    var offset = SerDe.ReadInt(inputStream);
+                    logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
+                    argOffsets.Add(offset);
+                }
+
+                var chainedFuncCount = SerDe.ReadInt(inputStream);
+                logger.LogDebug("Count of chained func = {0}", chainedFuncCount);
+
+                for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
+                {
+                    int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
+                    logger.LogDebug("UDF command length: " + lengthOfCommandByteArray);
+
+                    if (lengthOfCommandByteArray > 0)
+                    {
+                        CSharpWorkerFunc workerFunc;
+                        ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
+                            out workerFunc);
+
+                        func = func == null ? workerFunc : CSharpWorkerFunc.Chain(func, workerFunc);
+                    }
+                    else
+                    {
+                        logger.LogWarn("UDF lengthOfCommandByteArray = 0. Nothing to execute :-(");
+                    }
+                }
+
+                Debug.Assert(stageId != -1);
+                Debug.Assert(deserializerMode != null);
+                Debug.Assert(serializerMode != null);
+                Debug.Assert(func != null);
+
+                workerFuncList.Add(new WorkerFunc(func, argCount, argOffsets));
+            }
+
+            return new UDFCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode,
+                    serializerMode, formatter, commandProcessWatch, isSqlUdf, workerFuncList, stageId);
+        }
+
         private static void ReadCommand(Stream networkStream, IFormatter formatter, out int stageId,
             out string deserializerMode,
             out string serializerMode, out CSharpWorkerFunc workerFunc)
@@ -388,116 +413,7 @@ namespace Microsoft.Spark.CSharp
                 "--------------------------------------------------------------------------------------------------------------");
             logger.LogDebug(sb.ToString());
         }
-
-        private static void ExecuteCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime,
-                     string deserializerMode, CSharpWorkerFunc workerFunc, string serializerMode,
-                     IFormatter formatter, Stopwatch commandProcessWatch, int stageId, int isSqlUdf)
-        {
-            int count = 0;
-            int nullMessageCount = 0;
-            logger.LogDebug("Beginning to execute func");
-            var func = workerFunc.Func;
-
-            var funcProcessWatch = Stopwatch.StartNew();
-            DateTime initTime = DateTime.UtcNow;
-            foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
-            {
-                funcProcessWatch.Stop();
-
-                if (object.ReferenceEquals(null, message))
-                {
-                    nullMessageCount++;
-                    continue;
-                }
-
-                try
-                {
-                    WriteOutput(outputStream, serializerMode, message, formatter);
-                }
-                catch (Exception)
-                {
-                    logger.LogError("WriteOutput() failed at iteration {0}", count);
-                    throw;
-                }
-
-                count++;
-                funcProcessWatch.Start();
-            }
-
-            logger.LogInfo("Output entries count: " + count);
-            logger.LogDebug("Null messages count: " + nullMessageCount);
-
-            //if profiler:
-            //    profiler.profile(process)
-            //else:
-            //    process()
-
-            WriteDiagnosticsInfo(outputStream, bootTime, initTime);
-
-            commandProcessWatch.Stop();
-
-            // log statistics
-            logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
-            logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
-        }
-
-        private static void WriteOutput(Stream networkStream, string serializerMode, dynamic message, IFormatter formatter)
-        {
-            var buffer = GetSerializedMessage(serializerMode, message, formatter);
-            if (buffer == null)
-            {
-                logger.LogError("Buffer is null");
-            }
-
-            if (buffer.Length <= 0)
-            {
-                logger.LogError("Buffer length {0} cannot be <= 0", buffer.Length);
-            }
-
-            //Debug.Assert(buffer != null);
-            //Debug.Assert(buffer.Length > 0);
-            SerDe.Write(networkStream, buffer.Length);
-            SerDe.Write(networkStream, buffer);
-        }
-
-        private static byte[] GetSerializedMessage(string serializerMode, dynamic message, IFormatter formatter)
-        {
-            byte[] buffer;
-
-            switch ((SerializedMode)Enum.Parse(typeof(SerializedMode), serializerMode))
-            {
-                case SerializedMode.None:
-                    buffer = message as byte[];
-                    break;
-
-                case SerializedMode.String:
-                    buffer = SerDe.ToBytes(message as string);
-                    break;
-
-                case SerializedMode.Row:
-                    var pickler = new Pickler();
-                    buffer = pickler.dumps(new ArrayList { message });
-                    break;
-
-                default:
-                    try
-                    {
-                        var ms = new MemoryStream();
-                        formatter.Serialize(ms, message);
-                        buffer = ms.ToArray();
-                    }
-                    catch (Exception)
-                    {
-                        logger.LogError("Exception serializing output");
-                        logger.LogError("{0} : {1}", message.GetType().Name, message.GetType().FullName);
-                        throw;
-                    }
-                    break;
-            }
-
-            return buffer;
-        }
-
+                
         private static int ReadDiagnosticsInfo(Stream networkStream)
         {
             int rddId = SerDe.ReadInt(networkStream);
@@ -505,22 +421,7 @@ namespace Microsoft.Spark.CSharp
             int partitionId = SerDe.ReadInt(networkStream);
             logger.LogInfo("rddInfo: rddId {0}, stageId {1}, partitionId {2}", rddId, stageId, partitionId);
             return stageId;
-        }
-
-        private static void WriteDiagnosticsInfo(Stream networkStream, DateTime bootTime, DateTime initTime)
-        {
-            DateTime finishTime = DateTime.UtcNow;
-            const string format = "MM/dd/yyyy hh:mm:ss.fff tt";
-            logger.LogDebug("bootTime: {0}, initTime: {1}, finish_time: {2}",
-                bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format));
-            SerDe.Write(networkStream, (int)SpecialLengths.TIMING_DATA);
-            SerDe.Write(networkStream, ToUnixTime(bootTime));
-            SerDe.Write(networkStream, ToUnixTime(initTime));
-            SerDe.Write(networkStream, ToUnixTime(finishTime));
-
-            SerDe.Write(networkStream, 0L); //shuffle.MemoryBytesSpilled  
-            SerDe.Write(networkStream, 0L); //shuffle.DiskBytesSpilled
-        }
+        }       
 
         private static void WriteAccumulatorValues(Stream networkStream, IFormatter formatter)
         {
@@ -564,121 +465,7 @@ namespace Microsoft.Spark.CSharp
 
             logger.LogDebug("Files available in executor");
             logger.LogDebug("Location: {0}{1}{2}", folder, Environment.NewLine, outfiles.ToString());
-        }
-
-        private static long ToUnixTime(DateTime dt)
-        {
-            return (long)(dt - UnixTimeEpoch).TotalMilliseconds;
-        }
-
-        private static IEnumerable<dynamic> GetIterator(Stream inputStream, string serializedMode, int isFuncSqlUdf)
-        {
-            logger.LogInfo("Serialized mode in GetIterator: " + serializedMode);
-            IFormatter formatter = new BinaryFormatter();
-            var mode = (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode);
-            int messageLength;
-            Stopwatch watch = Stopwatch.StartNew();
-            while ((messageLength = SerDe.ReadInt(inputStream)) != (int)SpecialLengths.END_OF_DATA_SECTION)
-            {
-                watch.Stop();
-                if (messageLength > 0 || messageLength == (int)SpecialLengths.NULL)
-                {
-                    watch.Start();
-                    byte[] buffer = messageLength > 0 ? SerDe.ReadBytes(inputStream, messageLength) : null;
-                    watch.Stop();
-                    switch (mode)
-                    {
-                        case SerializedMode.String:
-                            {
-                                if (messageLength > 0)
-                                {
-                                    if (buffer == null)
-                                    {
-                                        logger.LogDebug("Buffer is null. Message length is {0}", messageLength);
-                                    }
-                                    yield return SerDe.ToString(buffer);
-                                }
-                                else
-                                {
-                                    yield return null;
-                                }
-                                break;
-                            }
-
-                        case SerializedMode.Row:
-                            {
-                                Debug.Assert(messageLength > 0);
-                                var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
-
-                                if (isFuncSqlUdf == 0)
-                                {
-                                    foreach (var row in unpickledObjects.Select(item => (item as RowConstructor).GetRow()))
-                                    {
-                                        yield return row;
-                                    }
-                                }
-                                else
-                                {
-                                    foreach (var row in unpickledObjects)
-                                    {
-                                        yield return row;
-                                    }
-                                }
-
-                                break;
-                            }
-
-                        case SerializedMode.Pair:
-                            {
-                                byte[] pairKey = buffer;
-                                byte[] pairValue;
-
-                                watch.Start();
-                                int valueLength = SerDe.ReadInt(inputStream);
-                                if (valueLength > 0)
-                                {
-                                    pairValue = SerDe.ReadBytes(inputStream, valueLength);
-                                }
-                                else if (valueLength == (int)SpecialLengths.NULL)
-                                {
-                                    pairValue = null;
-                                }
-                                else
-                                {
-                                    throw new Exception(string.Format("unexpected valueLength: {0}", valueLength));
-                                }
-                                watch.Stop();
-
-                                yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
-                                break;
-                            }
-
-                        case SerializedMode.None: //just return raw bytes
-                            {
-                                yield return buffer;
-                                break;
-                            }
-
-                        default:
-                            {
-                                if (buffer != null)
-                                {
-                                    var ms = new MemoryStream(buffer);
-                                    yield return formatter.Deserialize(ms);
-                                }
-                                else
-                                {
-                                    yield return null;
-                                }
-                                break;
-                            }
-                    }
-                }
-                watch.Start();
-            }
-
-            logger.LogInfo("total receive time: {0}", watch.ElapsedMilliseconds);
-        }
+        }                
 
         internal class SparkCLRAssemblyHandler
         {
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/Worker.csproj b/csharp/Worker/Microsoft.Spark.CSharp/Worker.csproj
index 36c9c1f..2ba4552 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/Worker.csproj
+++ b/csharp/Worker/Microsoft.Spark.CSharp/Worker.csproj
@@ -46,6 +46,8 @@
     <Reference Include="Microsoft.CSharp" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="WorkerFunc.cs" />
+    <Compile Include="UDFCommand.cs" />
     <Compile Include="MultiThreadWorker.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="TaskRunner.cs" />
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/WorkerFunc.cs b/csharp/Worker/Microsoft.Spark.CSharp/WorkerFunc.cs
new file mode 100644
index 0000000..0c6a638
--- /dev/null
+++ b/csharp/Worker/Microsoft.Spark.CSharp/WorkerFunc.cs
@@ -0,0 +1,25 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System.Runtime.Serialization;
+using Microsoft.Spark.CSharp.Core;
+using System.Collections.Generic;
+
+namespace Microsoft.Spark.CSharp
+{
+    internal class WorkerFunc
+    {
+        internal CSharpWorkerFunc CharpWorkerFunc { get; }
+
+        internal int ArgsCount { get; }
+
+        internal List<int> ArgOffsets { get; }
+
+        internal WorkerFunc(CSharpWorkerFunc func, int argsCount, List<int> argOffsets)
+        {
+            CharpWorkerFunc = func;
+            ArgsCount = argsCount;
+            ArgOffsets = argOffsets;
+        }                
+    }
+}
diff --git a/csharp/WorkerTest/MultiThreadWorkerTest.cs b/csharp/WorkerTest/MultiThreadWorkerTest.cs
index 0f0b307..6488ade 100644
--- a/csharp/WorkerTest/MultiThreadWorkerTest.cs
+++ b/csharp/WorkerTest/MultiThreadWorkerTest.cs
@@ -81,6 +81,7 @@ namespace WorkerTest
             worker.Start();
             int serverPort = 0;
             serverPort = SerDe.ReadInt(worker.StandardOutput.BaseStream);
+            Environment.SetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT", serverPort.ToString());
 
             StreamReader stdoutReader = worker.StandardOutput;
             Task.Run(() => {
@@ -119,7 +120,7 @@ namespace WorkerTest
         private ISocketWrapper CreateSocket(int serverPort)
         {
             var socket =SocketFactory.CreateSocket();
-            socket.Connect(IPAddress.Loopback, serverPort);
+            socket.Connect(IPAddress.Loopback, serverPort, null);
             return socket;
         }
 
@@ -131,6 +132,10 @@ namespace WorkerTest
         {
             SerDe.Write(s, splitIndex);
             SerDe.Write(s, ver);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0L);
             SerDe.Write(s, sparkFilesDir);
             SerDe.Write(s, numberOfIncludesItems);
             SerDe.Write(s, numBroadcastVariables);
diff --git a/csharp/WorkerTest/WorkerTest.cs b/csharp/WorkerTest/WorkerTest.cs
index 1826437..1c0f6ea 100644
--- a/csharp/WorkerTest/WorkerTest.cs
+++ b/csharp/WorkerTest/WorkerTest.cs
@@ -93,6 +93,7 @@ namespace WorkerTest
                 }
             };
 
+            Environment.SetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT", port.ToString());
             lock (syncLock)
             {
                 output.Clear();
@@ -125,6 +126,10 @@ namespace WorkerTest
         {
             SerDe.Write(s, splitIndex);
             SerDe.Write(s, ver);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0L);
             SerDe.Write(s, sparkFilesDir);
             SerDe.Write(s, numberOfIncludesItems);
             SerDe.Write(s, numBroadcastVariables);
@@ -631,6 +636,10 @@ namespace WorkerTest
             {
                 SerDe.Write(s, splitIndex);
                 SerDe.Write(s, ver);
+                SerDe.Write(s, 0);
+                SerDe.Write(s, 0);
+                SerDe.Write(s, 0);
+                SerDe.Write(s, 0L);
                 SerDe.Write(s, sparkFilesDir);
                 SerDe.Write(s, numberOfIncludesItems);
 
@@ -802,6 +811,10 @@ namespace WorkerTest
             using (var inputStream = new MemoryStream(500))
             {
                 SerDe.Write(inputStream, "1.0"); //version
+                SerDe.Write(inputStream, 0);
+                SerDe.Write(inputStream, 0);
+                SerDe.Write(inputStream, 0);
+                SerDe.Write(inputStream, 0L);
                 SerDe.Write(inputStream, ""); //includes directory
                 SerDe.Write(inputStream, 0); //number of included items
                 SerDe.Write(inputStream, 0); //number of broadcast variables
diff --git a/csharp/WorkerTest/WorkerTest.csproj b/csharp/WorkerTest/WorkerTest.csproj
index 76c9ba8..8fa76de 100644
--- a/csharp/WorkerTest/WorkerTest.csproj
+++ b/csharp/WorkerTest/WorkerTest.csproj
@@ -35,9 +35,8 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
-      <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
+    <Reference Include="Newtonsoft.Json">
+      <HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
     <Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
diff --git a/examples/Batch/WordCount/WordCount.csproj b/examples/Batch/WordCount/WordCount.csproj
index b655eb8..1961a0b 100644
--- a/examples/Batch/WordCount/WordCount.csproj
+++ b/examples/Batch/WordCount/WordCount.csproj
@@ -32,17 +32,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -84,4 +84,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Batch/pi/Pi.csproj b/examples/Batch/pi/Pi.csproj
index df0916b..464f4b5 100644
--- a/examples/Batch/pi/Pi.csproj
+++ b/examples/Batch/pi/Pi.csproj
@@ -35,17 +35,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -84,4 +84,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Examples.sln b/examples/Examples.sln
index 5ba0d23..3eaad7a 100644
--- a/examples/Examples.sln
+++ b/examples/Examples.sln
@@ -1,6 +1,6 @@
 Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio 14
-VisualStudioVersion = 14.0.25123.0
+VisualStudioVersion = 14.0.25420.1
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HdfsWordCount", "Streaming\HdfsWordCount\HdfsWordCount.csproj", "{6A2C7CF9-D64E-490D-9841-269EE14F7932}"
 EndProject
diff --git a/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj b/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
index 30fd07f..2f38f46 100644
--- a/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
+++ b/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
@@ -34,14 +34,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
-    <Reference Include="log4net">
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+    <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -80,4 +83,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Sql/HiveDataFrame/HiveDataFrame.csproj b/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
index 0040a3e..c826a80 100644
--- a/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
+++ b/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
@@ -33,17 +33,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
+    </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
-      <Private>True</Private>
-    </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
-      <Private>True</Private>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="System" />
     <Reference Include="System.Core" />
@@ -75,4 +75,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj b/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
index 24ecf84..fb4fc63 100644
--- a/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
+++ b/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
@@ -34,17 +34,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -82,4 +82,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Sql/SparkXml/SparkXml.csproj b/examples/Sql/SparkXml/SparkXml.csproj
index d770125..622b6a2 100644
--- a/examples/Sql/SparkXml/SparkXml.csproj
+++ b/examples/Sql/SparkXml/SparkXml.csproj
@@ -34,17 +34,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -82,4 +82,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Streaming/EventHub/EventHub.csproj b/examples/Streaming/EventHub/EventHub.csproj
index cc6d4e2..934eae5 100644
--- a/examples/Streaming/EventHub/EventHub.csproj
+++ b/examples/Streaming/EventHub/EventHub.csproj
@@ -34,16 +34,18 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.CSharp" />
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -85,4 +87,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj b/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
index 34facbb..c58ceae 100644
--- a/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
+++ b/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
@@ -32,22 +32,22 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
+    </Reference>
+    <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
+    </Reference>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+    </Reference>
     <Reference Include="Newtonsoft.Json">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
-    <Reference Include="CSharpWorker">
-      <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
-    </Reference>
-    <Reference Include="log4net">
-      <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
-    </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
-    </Reference>
     <Reference Include="Razorvine.Pyrolite">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
@@ -80,4 +80,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Streaming/Kafka/Kafka.csproj b/examples/Streaming/Kafka/Kafka.csproj
index 2bdaa81..68b15a7 100644
--- a/examples/Streaming/Kafka/Kafka.csproj
+++ b/examples/Streaming/Kafka/Kafka.csproj
@@ -32,15 +32,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
-    </Reference>
-    <Reference Include="log4net, Version=1.2.10.0, Culture=neutral, PublicKeyToken=1b44e1d426115821, processorArchitecture=MSIL">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+    <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
+    </Reference>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -79,4 +81,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj b/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
index e91905a..81f5a19 100644
--- a/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
+++ b/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
@@ -66,13 +66,13 @@
   </ItemGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net">
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="mscorlib" />
     <Reference Include="FSharp.Core, Version=$(TargetFSharpCoreVersion), Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
@@ -98,4 +98,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/fsharp/WordCount/WordCountFSharp.fsproj b/examples/fsharp/WordCount/WordCountFSharp.fsproj
index af96e49..86c3bda 100644
--- a/examples/fsharp/WordCount/WordCountFSharp.fsproj
+++ b/examples/fsharp/WordCount/WordCountFSharp.fsproj
@@ -71,20 +71,17 @@
   </ItemGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
-      <Private>True</Private>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="FSharp.Core">
       <HintPath>..\..\packages\FSharp.Core.4.0.0.1\lib\net40\FSharp.Core.dll</HintPath>
       <Private>True</Private>
     </Reference>
     <Reference Include="log4net">
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
-      <Private>True</Private>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
-      <Private>True</Private>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="mscorlib" />
     <Reference Include="Newtonsoft.Json">
@@ -110,4 +107,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/notes/running-mobius-app.md b/notes/running-mobius-app.md
index b430a0d..ea776a3 100644
--- a/notes/running-mobius-app.md
+++ b/notes/running-mobius-app.md
@@ -145,7 +145,7 @@ The following sample commands show how to run Mobius examples in local mode. Usi
 Computes the _approximate_ value of Pi using two appropaches and displays the value.
 
 ### WordCount Example (Batch)
-* Run `sparkclr-submit.cmd --exe SparkClrWordCount.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <InputFilePath>`
+* Run `sparkclr-submit.cmd --exe SparkClrPi.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <InputFilePath>`
 
 `InputFilePath` should be in one of the following formats:
 * `hdfs://path/to/inputfile`
diff --git a/scala/pom.xml b/scala/pom.xml
index cb9ce90..ec526cd 100644
--- a/scala/pom.xml
+++ b/scala/pom.xml
@@ -2,7 +2,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.microsoft.sparkclr</groupId>
   <artifactId>spark-clr_2.11</artifactId>
-  <version>2.0.200-SNAPSHOT</version>
+  <version>2.3.1-SNAPSHOT</version>
   <name>Mobius Project</name>
   <description>C# language binding and extensions to Apache Spark</description>
   <url>https://github.com/Microsoft/Mobius</url>
@@ -35,7 +35,7 @@
     <maven.compiler.target>1.5</maven.compiler.target>
     <encoding>UTF-8</encoding>
     <scala.version>2.11.8</scala.version>
-    <spark.version>2.0.2</spark.version>
+    <spark.version>2.3.1</spark.version>
     <scala.binary.version>2.11</scala.binary.version>
   </properties>
 
@@ -106,14 +106,19 @@
       <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-hive_2.11</artifactId>
-          <version>2.0.0</version>
+          <version>${spark.version}</version>
           <!--the following is placeholder for building uber package. Please keep as-is-->
           <!--<scope>provided</scope>-->
       </dependency>
       <dependency>
           <groupId>com.databricks</groupId>
-          <artifactId>spark-csv_2.10</artifactId>
-          <version>1.4.0</version>
+          <artifactId>spark-csv_2.11</artifactId>
+          <version>1.5.0</version>
+      </dependency>
+      <dependency>
+          <groupId>com.databricks</groupId>
+          <artifactId>spark-avro_2.11</artifactId>
+          <version>4.0.0</version>
       </dependency>
   </dependencies>
 
diff --git a/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala b/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
index d48e9f3..57ca361 100644
--- a/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
+++ b/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
@@ -12,6 +12,7 @@ import java.util.{List => JList, Map => JMap}
 
 import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.spark.api.python._
+import org.apache.spark.api.python.PythonAccumulatorV2
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark._
@@ -34,7 +35,7 @@ class CSharpRDD(
     cSharpWorkerExecutable: String,
     unUsedVersionIdentifier: String,
     broadcastVars: JList[Broadcast[PythonBroadcast]],
-    accumulator: Accumulator[JList[Array[Byte]]])
+    accumulator: PythonAccumulatorV2)
   extends PythonRDD (
     parent,
     SQLUtils.createCSharpFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
@@ -95,7 +96,7 @@ class CSharpRDD(
     logInfo("Env vars: " + envVars.asScala.mkString(", "))
 
     val runner = new PythonRunner(
-      Seq(ChainedPythonFunctions(Seq(func))), bufferSize, reuse_worker, false, Array(Array(0)))
+      Seq(ChainedPythonFunctions(Seq(func))), bufferSize, reuseWorker)
     runner.compute(firstParent.iterator(split, context), split.index, context)
   }
 
diff --git a/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala b/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
index c01d76a..79af72c 100644
--- a/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
+++ b/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
@@ -8,6 +8,7 @@ package org.apache.spark.sql.api.csharp
 import java.io.{ByteArrayOutputStream, DataOutputStream}
 
 import org.apache.spark.{Accumulator, SparkContext}
+import org.apache.spark.api.python.PythonAccumulatorV2
 import org.apache.spark.api.csharp.SerDe
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.api.python.{PythonBroadcast, PythonFunction, SerDeUtil}
@@ -51,7 +52,7 @@ object SQLUtils {
                            cSharpWorkerExecutable: String,
                            unUsedVersionIdentifier: String,
                            broadcastVars: JList[Broadcast[PythonBroadcast]],
-                           accumulator: Accumulator[JList[Array[Byte]]]) : PythonFunction = {
+                           accumulator: PythonAccumulatorV2) : PythonFunction = {
     PythonFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
       unUsedVersionIdentifier, broadcastVars, accumulator)
   }
diff --git a/scala/src/main/org/apache/spark/util/csharp/Utils.scala b/scala/src/main/org/apache/spark/util/csharp/Utils.scala
index 7bb7419..7294cae 100644
--- a/scala/src/main/org/apache/spark/util/csharp/Utils.scala
+++ b/scala/src/main/org/apache/spark/util/csharp/Utils.scala
@@ -127,17 +127,17 @@ object Utils extends Logging {
       timer.schedule(new TimerTask() {
         @Override
         def run() {
-          Runtime.getRuntime.halt(status)
+          if (status!=0) { Runtime.getRuntime.halt(status); }
         }
       }, maxDelayMillis)
       // try to exit nicely
-      System.exit(status);
+      if (status!=0) { System.exit(status); }
     } catch {
       // exit nastily if we have a problem
       case ex: Throwable => Runtime.getRuntime.halt(status)
     } finally {
       // should never get here
-      Runtime.getRuntime.halt(status)
+      if (status!=0) { Runtime.getRuntime.halt(status); }
     }
   }
 
@@ -147,7 +147,7 @@ object Utils extends Logging {
    * @param status  the exit status, zero for OK, non-zero for error
    */
   def exit(status: Int): Unit = {
-    exit(status, 1000)
+    exit(status, 1000);
   }
 
   private[spark] def listZipFileEntries(file: File): Array[String] = {
diff --git a/scripts/sparkclr-submit.cmd b/scripts/sparkclr-submit.cmd
index c6e1d50..5f119c8 100644
--- a/scripts/sparkclr-submit.cmd
+++ b/scripts/sparkclr-submit.cmd
@@ -42,7 +42,7 @@ if not exist "%SPARK_JARS_DIR%" (
 
 set SPARK_JARS_CLASSPATH=%SPARK_JARS_DIR%\*
 
-if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.11-2.0.200-SNAPSHOT.jar)
+if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.11-2.3.1-SNAPSHOT.jar)
 echo [sparkclr-submit.cmd] SPARKCLR_JAR=%SPARKCLR_JAR% 
 set SPARKCLR_CLASSPATH=%SPARKCLR_HOME%\lib\%SPARKCLR_JAR%
 REM SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
@@ -105,4 +105,4 @@ goto :eof
 	@echo Example 2:
 	@echo sparkclr-submit.cmd [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe c:\sparkclrapp\driver.zip arg1 arg2 arg3
 	@echo Example 3:
-	@echo sparkclr-submit.cmd [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar hdfs://path/to/spark-clr-1.6.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3
+	@echo sparkclr-submit.cmd [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar hdfs://path/to/spark-clr_2.11-2.3.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3
diff --git a/scripts/sparkclr-submit.sh b/scripts/sparkclr-submit.sh
index 5d94efa..e4ca34f 100755
--- a/scripts/sparkclr-submit.sh
+++ b/scripts/sparkclr-submit.sh
@@ -32,7 +32,7 @@ function usage() {
 	echo "Example 2:"
 	echo "sparkclr-submit.sh [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe sparkclrapp/driver.zip arg1 arg2 arg3"
 	echo "Example 3:"
-	echo "sparkclr-submit.sh [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar --remote-sparkclr-jar hdfs://path/to/spark-clr_2.10-1.6.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3"
+	echo "sparkclr-submit.sh [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar --remote-sparkclr-jar hdfs://path/to/spark-clr_2.11-2.3.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3"
 }
 
 [ "$SPARK_HOME" = "" ] && spark_home_error
@@ -57,7 +57,7 @@ fi
 
 export SPARK_JARS_CLASSPATH="$SPARK_JARS_DIR/*"
 
-export SPARKCLR_JAR=spark-clr_2.11-2.0.200-SNAPSHOT.jar
+export SPARKCLR_JAR=spark-clr_2.11-2.3.1-SNAPSHOT.jar
 export SPARKCLR_CLASSPATH="$SPARKCLR_HOME/lib/$SPARKCLR_JAR"
 # SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
 [ ! "$SPARKCLR_DEBUGMODE_EXT_JARS" = "" ] && export SPARKCLR_CLASSPATH="$SPARKCLR_CLASSPATH:$SPARKCLR_DEBUGMODE_EXT_JARS"