Merge branch 'master' of https://github.com/microsoft/Mobius into REPL

2016-07-09 18:08:17 +08:00 · 2016-07-09 18:08:17 +08:00 · 470bb411fe
--- a/.gitignore
+++ b/.gitignore
@ -30,6 +30,8 @@
 scala/dependency-reduced-pom.xml
 build/runtime/
 build/tools/
+build/examples/
+build/dependencies/
 *.log
 lib/

--- a/.travis.yml
+++ b/.travis.yml
@ -6,9 +6,9 @@ before_install:
  - sudo apt-get install xsltproc
  - nuget install NUnit.Runners -Version 3.0.0 -OutputDirectory testrunner
  # install maven 3.3.3
-  - wget http://archive.apache.org/dist/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz
-  - tar zxf apache-maven-3.3.3-bin.tar.gz && rm apache-maven-3.3.3-bin.tar.gz
-  - export M2_HOME="$PWD/apache-maven-3.3.3"
+  - wget http://archive.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
+  - tar zxf apache-maven-3.3.9-bin.tar.gz && rm apache-maven-3.3.9-bin.tar.gz
+  - export M2_HOME="$PWD/apache-maven-3.3.9"
  - export M2="$M2_HOME/bin"
  - export PATH="$M2:$PATH"
  - hash -r
--- a/README.md
+++ b/README.md
@ -1,6 +1,7 @@
-<h1><img src='/logo/spark-clr-clear-500x200.png' width='200px' alt='SparkCLR logo' /></h1>
+<img src='logo/mobius-star-200.png' width='125px' alt='Mobius logo' />
+# Mobius: C# API for Spark

-[SparkCLR](https://github.com/Microsoft/SparkCLR) (pronounced Sparkler) adds C# language binding to [Apache Spark](https://spark.apache.org/), enabling the implementation of Spark driver code and data processing operations in C#.
+[Mobius](https://github.com/Microsoft/Mobius) adds C# language binding to [Apache Spark](https://spark.apache.org/), enabling the implementation of Spark driver code and data processing operations in C#.

 For example, the word count sample in Apache Spark can be implemented in C# as follows :

@ -49,64 +50,108 @@ maxLatencyByDcDataFrame.ShowSchema();
 maxLatencyByDcDataFrame.Show();
 ```

-Refer to [SparkCLR\csharp\Samples](csharp/Samples) directory and [sample usage](csharp/Samples/Microsoft.Spark.CSharp/samplesusage.md) for complete samples.
+A simple Spark Streaming application that processes messages from Kafka using C# may be implemented using the following code:
+
+```  c#
+StreamingContext sparkStreamingContext = StreamingContext.GetOrCreate(checkpointPath, () =>
+    {
+      var ssc = new StreamingContext(sparkContext, slideDurationInMillis);
+      ssc.Checkpoint(checkpointPath);
+      var stream = KafkaUtils.CreateDirectStream(ssc, topicList, kafkaParams, perTopicPartitionKafkaOffsets);
+      //message format: [timestamp],[loglevel],[logmessage]
+      var countByLogLevelAndTime = stream
+                                    .Map(kvp => Encoding.UTF8.GetString(kvp.Value))
+                                    .Filter(line => line.Contains(","))
+                                    .Map(line => line.Split(','))
+                                    .Map(columns => new KeyValuePair<string, int>(
+                                                          string.Format("{0},{1}", columns[0], columns[1]), 1))
+                                    .ReduceByKeyAndWindow((x, y) => x + y, (x, y) => x - y,
+                                                          windowDurationInSecs, slideDurationInSecs, 3)
+                                    .Map(logLevelCountPair => string.Format("{0},{1}",
+                                                          logLevelCountPair.Key, logLevelCountPair.Value));
+      countByLogLevelAndTime.ForeachRDD(countByLogLevel =>
+      {
+          foreach (var logCount in countByLogLevel.Collect())
+              Console.WriteLine(logCount);
+      });
+      return ssc;
+    });
+sparkStreamingContext.Start();
+sparkStreamingContext.AwaitTermination();
+```
+Refer to [Mobius\csharp\Samples](csharp/Samples) directory and [sample usage](csharp/Samples/Microsoft.Spark.CSharp/samplesusage.md) for complete samples.

 ## API Documentation

-Refer to [SparkCLR C# API documentation](csharp/Adapter/documentation/SparkCLR_API_Documentation.md) for the list of Spark's data processing operations supported in SparkCLR.
+Refer to [Mobius C# API documentation](csharp/Adapter/documentation/Mobius_API_Documentation.md) for the list of Spark's data processing operations supported in Mobius.

 ## API Usage

-SparkCLR API usage samples are available at:
+Mobius API usage samples are available at:

-* [Samples project](csharp/Samples/Microsoft.Spark.CSharp/) which uses a comprehensive set of SparkCLR APIs to implement samples that are also used for functional validation of APIs
+* [Examples folder](./examples) which contains standalone [C# projects](/notes/running-mobius-app.md#running-mobius-examples-in-local-mode) that can be used as templates to start developing Mobius applications

-* [Examples folder](./examples) which contains standalone SparkCLR projects that can be used as templates to start developing SparkCLR applications
-
-* Performance test scenarios implemented in [C#](csharp/Perf/Microsoft.Spark.CSharp) and [Scala](scala/perf) for side by side comparison of Spark driver code
+* [Samples project](csharp/Samples/Microsoft.Spark.CSharp/) which uses a comprehensive set of Mobius APIs to implement samples that are also used for functional validation of APIs
+ 
+* Mobius performance test scenarios implemented in [C#](csharp/Perf/Microsoft.Spark.CSharp) and [Scala](scala/perf) for side by side comparison of Spark driver code

 ## Documents

-Refer to the [docs folder](docs) for design overview and other info on SparkCLR
+Refer to the [docs folder](docs) for design overview and other info on Mobius

 ## Build Status

 |Ubuntu 14.04.3 LTS |Windows |Unit test coverage |
 |-------------------|:------:|:-----------------:|
-|[![Build status](https://travis-ci.org/Microsoft/SparkCLR.svg?branch=master)](https://travis-ci.org/Microsoft/SparkCLR) |[![Build status](https://ci.appveyor.com/api/projects/status/lflkua81gg0swv6i/branch/master?svg=true)](https://ci.appveyor.com/project/SparkCLR/sparkclr/branch/master) |[![codecov.io](https://codecov.io/github/Microsoft/SparkCLR/coverage.svg?branch=master)](https://codecov.io/github/Microsoft/SparkCLR?branch=master) |
+|[![Build status](https://travis-ci.org/Microsoft/Mobius.svg?branch=master)](https://travis-ci.org/Microsoft/Mobius) |[![Build status](https://ci.appveyor.com/api/projects/status/lflkua81gg0swv6i/branch/master?svg=true)](https://ci.appveyor.com/project/SparkCLR/sparkclr/branch/master) |[![codecov.io](https://codecov.io/github/Microsoft/Mobius/coverage.svg?branch=master)](https://codecov.io/github/Microsoft/Mobius?branch=master)

 ## Getting Started

 | |Windows |Linux |
-|---|:------:|:----:|
-|Build & run unit tests |[windows-instructions.md](notes/windows-instructions.md#building-sparkclr) |[linux-instructions.md](notes/linux-instructions.md#building-sparkclr) |
-|Run samples (functional tests) in local mode |[windows-instructions.md](notes/windows-instructions.md#running-samples) |[linux-instructions.md](notes/linux-instructions.md#running-samples) |
-|Run standlone examples in Client mode |[Quick-start wiki](https://github.com/Microsoft/SparkCLR/wiki/Quick-Start#client-mode) |[Quick-start wiki](https://github.com/Microsoft/SparkCLR/wiki/Quick-Start#client-mode) |
-|Run standlone examples in Cluster mode |[Quick-start wiki](https://github.com/Microsoft/SparkCLR/wiki/Quick-Start#cluster-mode) |[Quick-start wiki](https://github.com/Microsoft/SparkCLR/wiki/Quick-Start#cluster-mode) |
+|---|:------|:----|
+|Build & run unit tests |[Build in Windows](notes/windows-instructions.md#building-mobius) |[Build in Linux](notes/linux-instructions.md#building-mobius-in-linux) |
+|Run samples (functional tests) in local mode |[Samples in Windows](notes/windows-instructions.md#running-samples) |[Samples in Linux](notes/linux-instructions.md#running-mobius-samples-in-linux) |
+|Run examples in local mode |[Examples in Windows](/notes/running-mobius-app.md#running-mobius-examples-in-local-mode) |[Examples in Linux](notes/linux-instructions.md#running-mobius-examples-in-linux) |
+|Run Mobius app |<ul><li>[Standalone cluster](notes/running-mobius-app.md#standalone-cluster)</li><li>[YARN cluster](notes/running-mobius-app.md#yarn-cluster)</li></ul> |<ul><li>[Linux cluster](notes/linux-instructions.md#running-mobius-applications-in-linux)</li><li>[Azure HDInsight Spark Cluster](/notes/linux-instructions.md#mobius-in-azure-hdinsight-spark-cluster)</li><li>[AWS EMR Spark Cluster](/notes/linux-instructions.md#mobius-in-amazon-web-services-emr-spark-cluster)</li> |

-Note: Refer to [linux-compatibility.md](notes/linux-compatibility.md) for using SparkCLR with Spark on Linux
+### Useful Links
+* [Configuration parameters in Mobius](/notes/configuration-mobius.md)
+* [Troubleshoot errors in Mobius](/notes/troubleshooting-mobius.md)
+* [Debug Mobius apps](/notes/running-mobius-app.md#debug-mode)

 ## Supported Spark Versions

-SparkCLR is built and tested with [Spark 1.4.1](https://github.com/Microsoft/SparkCLR/tree/branch-1.4), [Spark 1.5.2](https://github.com/Microsoft/SparkCLR/tree/branch-1.5) and [Spark 1.6.0](https://github.com/Microsoft/SparkCLR/tree/master).
+Mobius is built and tested with Apache Spark [1.4.1](https://github.com/Microsoft/Mobius/tree/branch-1.4), [1.5.2](https://github.com/Microsoft/Mobius/tree/branch-1.5) and [1.6.*](https://github.com/Microsoft/Mobius/tree/branch-1.6).
+
+## Releases
+
+Mobius releases are available at https://github.com/Microsoft/Mobius/releases. References needed to build C# Spark driver applicaiton using Mobius are also available in [NuGet](https://www.nuget.org/packages/Microsoft.SparkCLR)
+
+[![NuGet Badge](https://buildstats.info/nuget/Microsoft.SparkCLR)](https://www.nuget.org/packages/Microsoft.SparkCLR)
+
+Refer to [mobius-release-info.md](notes/mobius-release-info.md) for the details on versioning policy and the contents of the release.

 ## License

-[![License](https://img.shields.io/badge/license-MIT-blue.svg?style=plastic)](https://github.com/Microsoft/SparkCLR/blob/master/LICENSE)
+[![License](https://img.shields.io/badge/license-MIT-blue.svg?style=plastic)](https://github.com/Microsoft/Mobius/blob/master/LICENSE)

-SparkCLR is licensed under the MIT license. See [LICENSE](LICENSE) file for full license information.
+Mobius is licensed under the MIT license. See [LICENSE](LICENSE) file for full license information.


 ## Community

-[![Issue Stats](http://issuestats.com/github/Microsoft/SparkCLR/badge/pr)](http://issuestats.com/github/Microsoft/SparkCLR)
-[![Issue Stats](http://issuestats.com/github/Microsoft/SparkCLR/badge/issue)](http://issuestats.com/github/Microsoft/SparkCLR)
-[![Join the chat at https://gitter.im/Microsoft/SparkCLR](https://badges.gitter.im/Microsoft/SparkCLR.svg)](https://gitter.im/Microsoft/SparkCLR?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![Issue Stats](http://issuestats.com/github/Microsoft/Mobius/badge/pr)](http://issuestats.com/github/Microsoft/Mobius)
+[![Issue Stats](http://issuestats.com/github/Microsoft/Mobius/badge/issue)](http://issuestats.com/github/Microsoft/Mobius)
+[![Join the chat at https://gitter.im/Microsoft/Mobius](https://badges.gitter.im/Microsoft/Mobius.svg)](https://gitter.im/Microsoft/Mobius?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![Twitter](https://img.shields.io/twitter/url/http/twitter.com/MobiusForSpark.svg?style=social)](https://twitter.com/intent/tweet?text=@MobiusForSpark [your tweet] via @GitHub)

-* SparkCLR project welcomes contributions. To contribute, follow the instructions in [CONTRIBUTING.md](notes/CONTRIBUTING.md)
+* Mobius project welcomes contributions. To contribute, follow the instructions in [CONTRIBUTING.md](notes/CONTRIBUTING.md)

-* Options to ask your question to the SparkCLR community
-  * create issue on [GitHub](https://github.com/Microsoft/SparkCLR)
+* Options to ask your question to the Mobius community
+  * create issue on [GitHub](https://github.com/Microsoft/Mobius)
  * create post with "sparkclr" tag in [Stack Overflow](https://stackoverflow.com/questions/tagged/sparkclr)
-  * send email to sparkclr-user@googlegroups.com
-  * join chat at [SparkCLR room in Gitter](https://gitter.im/Microsoft/SparkCLR)
+  * join chat at [Mobius room in Gitter](https://gitter.im/Microsoft/Mobius)
+  * tweet [@MobiusForSpark](http://twitter.com/MobiusForSpark)
+
+## Code of Conduct
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
--- a/appveyor.yml
+++ b/appveyor.yml
@ -1,4 +1,4 @@
-version: 1.6.0-SNAPSHOT.{build}
+version: 1.6.2-SNAPSHOT.{build}

 environment:
  securefile:          
@ -26,6 +26,7 @@ build_script:
  - cmd: SET MAVEN_OPTS=-XX:MaxPermSize=2g -Xmx4g
  - cmd: SET JAVA_OPTS=-XX:MaxPermSize=2g -Xmx4g
  - cmd: SET MVN_QUIET=--quiet
+  - ps: if($env:APPVEYOR_REPO_TAG -eq $FALSE) {.\dev\scripts\SetSparkClrNugetPackageVersion.ps1 -nuspecDir .\csharp -version $env:APPVEYOR_BUILD_VERSION}
  - cmd: cd .\build
  - cmd: .\Build.cmd
  - cmd: cd ..
@ -45,7 +46,7 @@ after_test:
    - pip install codecov
    - codecov -f "SparkCLRCodeCoverage.xml"
    - cmd: cd .\build\localmode
-    - cmd: .\Runsamples.cmd --validate
+    - cmd: if not defined ProjectVersion (.\Runsamples.cmd --validate)
    - cmd: cd ..\..
    - cmd: dir csharp\Microsoft*.nupkg
    - cmd: dir scala\target\spark-clr*.jar
@ -66,7 +67,15 @@ deploy:
  - provider: NuGet                             # deploy to NuGet.org
    api_key:                                    
      secure: TscZXMoOxrMfjR2TvGBns6b+IILWvo0WJpxikoGsMCqEcMj/x41Le1j8dHTCJMjI
-    skip_symbols: false                         # push symbols to SymbolSource.org
+    skip_symbols: false
    artifact: /Microsoft.*\.nupkg/
    on:
      appveyor_repo_tag: true                   # deploy on tag push only
+
+  - provider: NuGet                             # deploy to MyGet.org
+    server: https://www.myget.org/F/mobiusforspark/api/v2/package
+    api_key:
+      secure: 1c6+PZ3zOdIgIy2y8rf1g/NfbcfoxwNcymNBUr1591mD3Ull2X32Qvw2QyCXqFka
+    skip_symbols: false
+    symbol_server: https://www.myget.org/F/mobiusforspark/api/v2/package
+    artifact: /Microsoft.*\.nupkg/
--- a/build/Build.cmd
+++ b/build/Build.cmd
@ -38,14 +38,14 @@ if NOT EXIST "%SPARKCLR_HOME%\lib" mkdir "%SPARKCLR_HOME%\lib"
 if NOT EXIST "%SPARKCLR_HOME%\samples" mkdir "%SPARKCLR_HOME%\samples"
 if NOT EXIST "%SPARKCLR_HOME%\repl" mkdir "%SPARKCLR_HOME%\repl"

-@echo Assemble SparkCLR Scala components
+@echo Assemble Mobius Scala components
 pushd "%CMDHOME%\..\scala"

@rem clean the target directory first
 call mvn.cmd %MVN_QUIET% clean

@rem
-@rem Note: Shade-plugin helps creates an uber-package to simplify SparkCLR job submission;
+@rem Note: Shade-plugin helps creates an uber-package to simplify running samples during CI;
@rem however, it breaks debug mode in IntellJ. So enable shade-plugin
@rem only in build.cmd to create the uber-package.
@rem
@ -80,19 +80,22 @@ IF "%APPVEYOR_REPO_TAG%" == "true" (goto :sign)

 :mvndone

+set MVN_ERRORLEVEL=%ERRORLEVEL%
+
@rem
@rem After uber package is created, restore Pom.xml
@rem
 copy /y %temp%\pom.xml.original pom.xml

-if %ERRORLEVEL% NEQ 0 (
-  @echo Build SparkCLR Scala components failed, stop building.
+if %MVN_ERRORLEVEL% NEQ 0 (
+  @echo Build Mobius Scala components failed, stop building.
  popd
  goto :eof
 )

-@echo SparkCLR Scala binaries
-copy /y target\spark*.jar "%SPARKCLR_HOME%\lib\"
+@echo Mobius Scala binaries
+@rem copy non-uber jar to runtime\lib folder
+powershell -f ..\build\copyjar.ps1
 popd

@REM Any .jar files under the lib directory will be copied to the staged runtime lib tree.
@ -105,7 +108,7 @@ if EXIST "%CMDHOME%\lib" (
 )

 :buildCSharp
-@echo Assemble SparkCLR C# components
+@echo Assemble Mobius C# components
 pushd "%CMDHOME%\..\csharp"

@rem clean any possible previous build first
@ -113,20 +116,20 @@ call Clean.cmd
 call Build.cmd

 if %ERRORLEVEL% NEQ 0 (
-  @echo Build SparkCLR C# components failed, stop building.
+  @echo Build Mobius C# components failed, stop building.
  popd
  goto :eof
 )

-@echo SparkCLR C# binaries
+@echo Mobius C# binaries
 copy /y Worker\Microsoft.Spark.CSharp\bin\Release\* "%SPARKCLR_HOME%\bin\"

-@echo SparkCLR C# Samples binaries
+@echo Mobius C# Samples binaries
@rem need to include CSharpWorker.exe.config in samples folder
 copy /y Worker\Microsoft.Spark.CSharp\bin\Release\* "%SPARKCLR_HOME%\samples\"
 copy /y Samples\Microsoft.Spark.CSharp\bin\Release\* "%SPARKCLR_HOME%\samples\"

-@echo SparkCLR Samples data
+@echo Mobius Samples data
 copy /y Samples\Microsoft.Spark.CSharp\data\* "%SPARKCLR_HOME%\data\"

@echo SparkCLR REPL
@ -135,7 +138,59 @@ copy /y Repl\bin\Release\* "%SPARKCLR_HOME%\repl\"

 popd

-@echo Assemble SparkCLR script components
+@echo Download external dependencies
+pushd "%CMDHOME%"
+set DEPENDENCIES_DIR=dependencies
+if NOT EXIST "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
+set DEPENDENCIES_HOME=%CMDHOME%\%DEPENDENCIES_DIR%
+powershell -f localmode\downloadtools.ps1 dependencies
+@echo Assemble dependencies
+xcopy /e /y "%DEPENDENCIES_HOME%"  "%SPARKCLR_HOME%\dependencies\"
+
+@echo Assemble Mobius examples
+pushd "%CMDHOME%\..\examples"
+call Clean.cmd
+call Build.cmd
+
+if %ERRORLEVEL% NEQ 0 (
+  @echo Build Mobius .NET examples failed, stop building.
+  popd
+  goto :eof
+)
+
+set EXAMPLES_HOME=%CMDHOME%\examples
+@echo set EXAMPLES_HOME=%EXAMPLES_HOME%
+
+if EXIST "%EXAMPLES_HOME%" (
+    @echo Delete existing %EXAMPLES_HOME% ...
+    rd /s /q "%EXAMPLES_HOME%"
+)
+if NOT EXIST "%EXAMPLES_HOME%" mkdir "%EXAMPLES_HOME%"
+
+set CURRDIR=%cd%
+for /f "delims=" %%D in ('dir /b /s bin') do call :copyexamples %%D
+goto :copyscripts
+
+:copyexamples
+    set EXAMPLES_SRC=%1
+    set EXAMPLES_TARGET=%1
+    call set EXAMPLES_TARGET=%%EXAMPLES_TARGET:%CURRDIR%=%EXAMPLES_HOME%%%
+    set EXAMPLES_TARGET=%EXAMPLES_TARGET:~0,-4%
+
+    @echo mkdir %EXAMPLES_TARGET%
+    if NOT EXIST "%EXAMPLES_TARGET%" mkdir "%EXAMPLES_TARGET%"
+
+    REM 1. Copy dependencies from %SPARKCLR_HOME%\bin to use latest Mobius binaries
+    xcopy /y "%SPARKCLR_HOME%\bin\*" "%EXAMPLES_TARGET%"
+    REM 2. copy Examples APPs
+    xcopy /d /y "%EXAMPLES_SRC%\Release" "%EXAMPLES_TARGET%"
+
+    goto :eof
+
+:copyscripts
+popd
+
+@echo Assemble Mobius script components
 xcopy /e /y "%CMDHOME%\..\scripts"  "%SPARKCLR_HOME%\scripts\"

@echo Make distribution
@ -148,10 +203,21 @@ if not defined ProjectVersion (
 )

 set SPARKCLR_NAME=spark-clr_2.10-%ProjectVersion%
+@echo "%SPARKCLR_HOME%
+
+@rem copy samples to top-level folder before zipping
+@echo move /Y "%SPARKCLR_HOME%\samples "%CMDHOME%"
+move /Y %SPARKCLR_HOME%\samples %CMDHOME%
+@echo move /Y "%SPARKCLR_HOME%\data" "%CMDHOME%\samples"
+move /Y %SPARKCLR_HOME%\data %CMDHOME%\samples
+
+@rem copy release info
+@echo copy /Y "%CMDHOME%\..\notes\mobius-release-info.md"
+copy /Y "%CMDHOME%\..\notes\mobius-release-info.md"

@rem Create the zip file
-@echo 7z a .\target\%SPARKCLR_NAME%.zip runtime localmode ..\examples
-7z a .\target\%SPARKCLR_NAME%.zip runtime localmode ..\examples
+@echo 7z a .\target\%SPARKCLR_NAME%.zip runtime examples samples mobius-release-info.md
+7z a .\target\%SPARKCLR_NAME%.zip runtime examples samples mobius-release-info.md

 :distdone
 popd
--- a/build/build.sh
+++ b/build/build.sh
@ -18,14 +18,14 @@ fi
 [ ! -d "$SPARKCLR_HOME/samples" ] && mkdir "$SPARKCLR_HOME/samples"
 [ ! -d "$SPARKCLR_HOME/scripts" ] && mkdir "$SPARKCLR_HOME/scripts"

-echo "Assemble SparkCLR Scala components"
+echo "Assemble Mobius Scala components"
 pushd "$FWDIR/../scala"

 # clean the target directory first
 mvn clean -q
 [ $? -ne 0 ] && exit 1

-# Note: Shade-plugin helps creates an uber-package to simplify SparkCLR job submission;
+# Note: Shade-plugin helps creates an uber-package to simplify running samples during CI;
 # however, it breaks debug mode in IntellJ. So enable shade-plugin
 # only in build.cmd to create the uber-package.
 # build the package
@ -33,11 +33,11 @@ mvn package -Puber-jar -q

 if [ $? -ne 0 ]
 then
-	echo "Build SparkCLR Scala components failed, stop building."
+	echo "Build Mobius Scala components failed, stop building."
 	popd
 	exit 1
 fi
-echo "SparkCLR Scala binaries"
+echo "Mobius Scala binaries"
 cp target/spark*.jar "$SPARKCLR_HOME/lib/"
 popd

@ -52,7 +52,7 @@ then
  done
 fi

-echo "Assemble SparkCLR C# components"
+echo "Assemble Mobius C# components"
 pushd "$FWDIR/../csharp"

 # clean any possible previous build first
@ -62,23 +62,37 @@ pushd "$FWDIR/../csharp"

 if [ $? -ne 0 ];
 then
-	echo "Build SparkCLR C# components failed, stop building."
+	echo "Build Mobius C# components failed, stop building."
 	popd
 	exit 1
 fi
-echo "SparkCLR C# binaries"
+echo "Mobius C# binaries"
 cp Worker/Microsoft.Spark.CSharp/bin/Release/* "$SPARKCLR_HOME/bin/"

-echo "SparkCLR C# Samples binaries"
+echo "Mobius C# Samples binaries"
 # need to include CSharpWorker.exe.config in samples folder
 cp Worker/Microsoft.Spark.CSharp/bin/Release/* "$SPARKCLR_HOME/samples/"
 cp Samples/Microsoft.Spark.CSharp/bin/Release/* "$SPARKCLR_HOME/samples/"

-echo "SparkCLR Samples data"
+echo "Mobius Samples data"
 cp Samples/Microsoft.Spark.CSharp/data/* "$SPARKCLR_HOME/data/"
 popd

-echo "Assemble SparkCLR script components"
+echo "Assemble Mobius examples"
+pushd "$FWDIR/../examples"
+# clean any possible previous build first
+./clean.sh
+./build.sh
+
+if [ $? -ne 0 ];
+then
+	echo "Build Mobius .NET Examples failed, stop building."
+	popd
+	exit 1
+fi
+popd
+
+echo "Assemble Mobius script components"
 pushd "$FWDIR/../scripts"
 cp *.sh  "$SPARKCLR_HOME/scripts/"
 popd
--- a/build/copyjar.ps1
+++ b/build/copyjar.ps1
@ -0,0 +1,43 @@
+function Get-ScriptDirectory
+{
+    $Invocation = (Get-Variable MyInvocation -Scope 1).Value;
+    if($Invocation.PSScriptRoot)
+    {
+        $Invocation.PSScriptRoot;
+    }
+    Elseif($Invocation.MyCommand.Path)
+    {
+        Split-Path $Invocation.MyCommand.Path
+    }
+    else
+    {
+        $Invocation.InvocationName.Substring(0,$Invocation.InvocationName.LastIndexOf("\"));
+    }
+}
+
+#
+# main body of the script
+# this script copies jar file for the release
+#
+$scriptDir= Get-ScriptDirectory
+write-output "Script directory: $scriptDir"
+$destDir = "$scriptDir\runtime\lib"
+write-output "Directory to which file will be copied to: $destDir"
+pushd ..\scala\target
+
+#non-uber jar has original prefix - this is the file that needs to be copied over
+$files = get-childitem $configPath -filter "original*"
+
+#only one file in $files
+foreach($file in $files)
+{
+	$sourceFileName = $file.Name
+	write-output "Name of the file to copy: $sourceFileName"
+}
+
+$pattern = "^original-(.*)"
+$destFileName = $sourceFileName -replace $pattern,'$1'
+write-output "Name of the file to use in destination: $destFileName"
+
+copy-item $sourceFileName -Destination "$destDir\$destFileName"
+popd
--- a/build/localmode/RunSamples.cmd
+++ b/build/localmode/RunSamples.cmd
@ -28,7 +28,7 @@ if "%1" == "" (
    @rem TODO: this check will fail if "--exe" only exists in the argument list of user application.
    if "%1" == "--exe" (
        set USER_EXE="true"
-        @echo [RunSamples.cmd] Run user specified application, instead of SparkCLR samples.
+        @echo [RunSamples.cmd] Run user specified application, instead of Mobius samples.
    )

    rem - shift the arguments and examine %1 again
@ -47,16 +47,14 @@ if "%precheck%" == "bad" (goto :EOF)
@rem 
@rem setup Hadoop and Spark versions
@rem
-set SPARK_VERSION=1.6.0
+set SPARK_VERSION=1.6.2
 set HADOOP_VERSION=2.6
@echo [RunSamples.cmd] SPARK_VERSION=%SPARK_VERSION%, HADOOP_VERSION=%HADOOP_VERSION%

-@rem Windows 7/8/10 may not allow powershell scripts by default
-powershell -Command Set-ExecutionPolicy -Scope CurrentUser -ExecutionPolicy Unrestricted
-
@rem download runtime dependencies
 pushd "%CMDHOME%"
-powershell -f downloadtools.ps1 run !VERBOSE!
+@rem Windows 7/8/10 may not allow powershell scripts by default
+powershell -ExecutionPolicy Unrestricted -File downloadtools.ps1 run !VERBOSE!
@echo [RunSamples.cmd] UpdateRuntime.cmd
 type ..\tools\updateruntime.cmd
 call ..\tools\updateruntime.cmd
@ -67,7 +65,12 @@ if defined ProjectVersion (
 )

 set SPARKCLR_HOME=%CMDHOME%\..\runtime
-set SPARKCSV_JARS=
+
+@rem spark-csv package and its depenedency are required for DataFrame operations in Mobius
+set SPARKCLR_EXT_PATH=%SPARKCLR_HOME%\dependencies
+set SPARKCSV_JAR1PATH=%SPARKCLR_EXT_PATH%\spark-csv_2.10-1.3.0.jar
+set SPARKCSV_JAR2PATH=%SPARKCLR_EXT_PATH%\commons-csv-1.1.jar
+set SPARKCLR_EXT_JARS=%SPARKCSV_JAR1PATH%,%SPARKCSV_JAR2PATH%

@rem RunSamples.cmd is in local mode, should not load Hadoop or Yarn cluster config. Disable Hadoop/Yarn conf dir.
 set HADOOP_CONF_DIR=
@ -81,7 +84,7 @@ set SAMPLES_DIR=%SPARKCLR_HOME%\samples
@echo [RunSamples.cmd] JAVA_HOME=%JAVA_HOME%
@echo [RunSamples.cmd] SPARK_HOME=%SPARK_HOME%
@echo [RunSamples.cmd] SPARKCLR_HOME=%SPARKCLR_HOME%
-@echo [RunSamples.cmd] SPARKCSV_JARS=%SPARKCSV_JARS%
+@echo [RunSamples.cmd] SPARKCLR_EXT_JARS=%SPARKCLR_EXT_JARS%

 pushd "%SPARKCLR_HOME%\scripts"
@echo [RunSamples.cmd] CWD=
@ -93,8 +96,8 @@ if !INTERACTIVE! == "interactive" (
 	call sparkclr-repl.cmd
 ) else (
    if "!USER_EXE!"=="" (
-        @echo [RunSamples.cmd] call sparkclr-submit.cmd --exe SparkCLRSamples.exe %SAMPLES_DIR% spark.local.dir %TEMP_DIR% sparkclr.sampledata.loc %SPARKCLR_HOME%\data %*
-        call sparkclr-submit.cmd --exe SparkCLRSamples.exe %SAMPLES_DIR% spark.local.dir %TEMP_DIR% sparkclr.sampledata.loc %SPARKCLR_HOME%\data %*
+        @echo [RunSamples.cmd] call sparkclr-submit.cmd --jars %SPARKCLR_EXT_JARS% -exe SparkCLRSamples.exe %SAMPLES_DIR% spark.local.dir %TEMP_DIR% sparkclr.sampledata.loc %SPARKCLR_HOME%\data %*
+        call sparkclr-submit.cmd --jars %SPARKCLR_EXT_JARS% --exe SparkCLRSamples.exe %SAMPLES_DIR% spark.local.dir %TEMP_DIR% sparkclr.sampledata.loc %SPARKCLR_HOME%\data %*
    ) else (
        @echo [RunSamples.cmd] call sparkclr-submit.cmd %*
        call sparkclr-submit.cmd %*
--- a/build/localmode/downloadtools.ps1
+++ b/build/localmode/downloadtools.ps1
@ -12,7 +12,7 @@ if ($stage.ToLower() -eq "run")
    $hadoopVersion = if ($envValue -eq $null) { "2.6" } else { $envValue }
    
    $envValue = [Environment]::GetEnvironmentVariable("SPARK_VERSION")
-    $sparkVersion = if ($envValue -eq $null) { "1.6.0" } else { $envValue }
+    $sparkVersion = if ($envValue -eq $null) { "1.6.1" } else { $envValue }
    
    Write-Output "[downloadtools] hadoopVersion=$hadoopVersion, sparkVersion=$sparkVersion"
 }
@ -65,6 +65,7 @@ function Replace-VariableInFile($variable, $value, $sourceFile, $targetFile)

 function Download-File($url, $output)
 {
+    $output = [System.IO.Path]::GetFullPath($output)
    if (test-path $output)
    {
        Write-Output "[downloadtools.Download-File] $output exists. No need to download."
@ -83,7 +84,13 @@ function Download-File($url, $output)
        -SourceIdentifier Web.DownloadProgressChanged -Action {
        $Global:Data = $event
    }
-    $wc.DownloadFileAsync($url, $output)
+    
+    $tmpOutput = $output + ".tmp.download"
+    if (test-path $tmpOutput) {
+        Remove-Item $tmpOutput
+    }
+    
+    $wc.DownloadFileAsync($url, $tmpOutput)
    While (!($Global:downloadComplete)) {
        $percent = $Global:Data.SourceArgs.ProgressPercentage
        $totalBytes = $Global:Data.SourceArgs.TotalBytesToReceive
@ -92,6 +99,8 @@ function Download-File($url, $output)
            Write-Progress -Activity ("Downloading file to {0} from {1}" -f $output,$url) -Status ("{0} bytes \ {1} bytes" -f $receivedBytes,$totalBytes)  -PercentComplete $percent
        }
    }
+    
+    Rename-Item $tmpOutput -NewName $output
    Write-Progress -Activity ("Downloading file to {0} from {1}" -f $output, $url) -Status ("{0} bytes \ {1} bytes" -f $receivedBytes,$totalBytes)  -Completed
    Unregister-Event -SourceIdentifier Web.DownloadFileCompleted
    Unregister-Event -SourceIdentifier Web.DownloadProgressChanged
@ -213,11 +222,11 @@ function Download-BuildTools
    }
    
    # Apache Maven
-	$mvnVer = "apache-maven-3.3.3"
+	$mvnVer = "apache-maven-3.3.9"
    $mvnCmd = "$toolsDir\$mvnVer\bin\mvn.cmd"
    if (!(test-path $mvnCmd))
    {
-        $url = "http://www.us.apache.org/dist/maven/maven-3/3.3.3/binaries/$mvnVer-bin.tar.gz"
+        $url = "http://www.us.apache.org/dist/maven/maven-3/3.3.9/binaries/$mvnVer-bin.tar.gz"
        $output="$toolsDir\$mvnVer-bin.tar.gz"
        Download-File $url $output
        Untar-File $output $toolsDir
@ -257,7 +266,7 @@ function Download-BuildTools
        $gpgZip = "$toolsDir\gpg4win-vanilla-2.3.0.zip"
        if (!(test-path $gpgZip))
        {
-            $url = "https://github.com/SparkCLR/build/blob/master/tools/gpg4win-vanilla-2.3.0.zip?raw=true"
+            $url = "https://github.com/MobiusForSpark/build/blob/master/tools/gpg4win-vanilla-2.3.0.zip?raw=true"
            $output=$gpgZip
            Download-File $url $output
            # Unzip-File $output $toolsDir
@ -280,6 +289,39 @@ function Download-BuildTools
    $envStream.close()
 }

+function Download-ExternalDependencies
+{
+    $readMeStream = [System.IO.StreamWriter] "$scriptDir\..\dependencies\ReadMe.txt"
+	$readMeStream.WriteLine("The files in this folder are dependencies of Mobius Project")
+	$readMeStream.WriteLine("Refer to the following download locations for details on the jars like POM file, license etc.")
+	$readMeStream.WriteLine("")
+	
+	$readMeStream.WriteLine("------------ Dependencies for CSV parsing in Mobius DataFrame API -----------------------------")
+	# Downloading spark-csv package and its depenency. These packages are required for DataFrame operations in Mobius
+	$url = "http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.3.0/spark-csv_2.10-1.3.0.jar"
+    $output="$scriptDir\..\dependencies\spark-csv_2.10-1.3.0.jar"
+    Download-File $url $output
+	Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
+	$readMeStream.WriteLine("$url")
+	
+	$url = "http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.1/commons-csv-1.1.jar"
+	$output="$scriptDir\..\dependencies\commons-csv-1.1.jar"
+	Download-File $url $output
+    Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
+	$readMeStream.WriteLine("$url")
+	$readMeStream.WriteLine("")
+	$readMeStream.WriteLine("------------ Dependencies for Kafka-based processing in Mobius Streaming API -----------------------------")
+		
+	$url = "http://search.maven.org/remotecontent?filepath=org/apache/spark/spark-streaming-kafka-assembly_2.10/1.6.1/spark-streaming-kafka-assembly_2.10-1.6.1.jar"
+	$output="$scriptDir\..\dependencies\spark-streaming-kafka-assembly_2.10-1.6.1.jar"
+	Download-File $url $output
+    Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"	
+	$readMeStream.WriteLine("$url")
+	
+	$readMeStream.close()
+	return
+}
+
 function Download-RuntimeDependencies
 {
    # Create a cmd file to update environment variable
@ -340,7 +382,7 @@ function Download-RuntimeDependencies
    $winutilsExe = "$winutilsBin\winutils.exe"
    if (!(test-path $winutilsExe))
    {
-        $url = "http://public-repo-1.hortonworks.com/hdp-win-alpha/winutils.exe"
+        $url = "https://github.com/MobiusForSpark/winutils/blob/master/hadoop-2.6.0/bin/winutils.exe?raw=true"
        $output=$winutilsExe
        Download-File $url $output
    }
@ -480,8 +522,8 @@ function Print-Usage
    Write-Output ''
    Write-Output '    This script takes one input parameter ("stage"), which can be either [build | run].'
    Write-Output ''
-    Write-Output '        Build: Download tools required in building SparkCLR;'
-    Write-Output '        Run: Download Apache Spark and related binaries, required to run SparkCLR samples locally.'
+    Write-Output '        Build: Download tools required in building Mobius;'
+    Write-Output '        Run: Download Apache Spark and related binaries, required to run Mobius samples locally.'
    Write-Output ''
    Write-Output '====================================================================================================='
 }
@ -513,6 +555,10 @@ elseif ($stage.ToLower() -eq "run")
 {
    Download-RuntimeDependencies
 }
+elseif ($stage.ToLower() -eq "dependencies")
+{
+    Download-ExternalDependencies
+}
 else
 {
    Print-Usage
--- a/build/localmode/precheck.cmd
+++ b/build/localmode/precheck.cmd
@ -6,7 +6,7 @@ if not exist "%JAVA_HOME%\bin\java.exe" (
    @echo. 
    @echo ============================================================================================
    @echo. 
-    @echo WARNING!!! %~nx0 detected JAVA_HOME is not set properly. SparkCLR requires JDK 7u85 and above, 
+    @echo WARNING!!! %~nx0 detected JAVA_HOME is not set properly. Mobius requires JDK 7u85 and above, 
    @echo            or JDK 8u60 and above. You can either download OpenJDK available at 
    @echo            http://www.azul.com/downloads/zulu/zulu-windows/, or use Oracle JDK. 
    @echo. 
@ -33,7 +33,7 @@ goto :eof
@echo ============================================================================================
@echo. 
@echo WARNING!!! %~nx0 detected version of Visual Studio in current command prompt as %version%. 
-@echo            SparkCLR %~nx0 requires "Developer Command Prompt for VS2013" and above, or 
+@echo            Mobius %~nx0 requires "Developer Command Prompt for VS2013" and above, or 
@echo            "MSBuild Command Prompt for VS2015" and above.
@echo. 
@echo ============================================================================================
--- a/build/localmode/run-samples.sh
+++ b/build/localmode/run-samples.sh
@ -11,7 +11,7 @@ do
 done

 # setup Hadoop and Spark versions
-export SPARK_VERSION=1.6.0
+export SPARK_VERSION=1.6.2
 export HADOOP_VERSION=2.6
 echo "[run-samples.sh] SPARK_VERSION=$SPARK_VERSION, HADOOP_VERSION=$HADOOP_VERSION"

@ -27,18 +27,6 @@ if [ ! -d "$SPARK_HOME" ];
 then
  wget "http://www.us.apache.org/dist/spark/spark-$SPARK_VERSION/$SPARK.tgz" -O "$TOOLS_DIR/$SPARK.tgz"
  tar xfz "$TOOLS_DIR/$SPARK.tgz" -C "$TOOLS_DIR"
-
-  # hack: use a customized spark
-  # TODO: fix the C# Worker
-  export SPARK_SRC="$TOOLS_DIR/spark-$SPARK_VERSION"
-  wget "http://www.us.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION.tgz" -O "$SPARK_SRC.tgz"
-  tar xfz "$SPARK_SRC.tgz" -C "$TOOLS_DIR"
-  pushd "$SPARK_SRC"
-  sed -i "s/val useDaemon = /val useDaemon = false \/\//g" "core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala"
-  build/mvn -Pyarn -Phadoop-$HADOOP_VERSION -DskipTests package 2>&1 | grep warn
-  [ $? -ne 0 ] && exit 1
-  cp assembly/target/scala-2.10/spark-assembly*hadoop*.jar "$SPARK_HOME/lib/"
-  popd
 fi
 export PATH="$SPARK_HOME/bin:$PATH"

--- a/build/localmode/zipdir.ps1
+++ b/build/localmode/zipdir.ps1
@ -1,6 +1,9 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
 #
 # This script takes in "dir" and "target" parameters, zips all files under dir to the target file
 #
+
 Param([string]$dir, [string]$target)

 function Get-ScriptDirectory
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
@ -53,12 +53,7 @@
    <Reference Include="System" />
    <Reference Include="System.Configuration" />
    <Reference Include="System.Core" />
-    <Reference Include="System.Runtime.Serialization" />
-    <Reference Include="System.Xml.Linq" />
-    <Reference Include="System.Data.DataSetExtensions" />
    <Reference Include="Microsoft.CSharp" />
-    <Reference Include="System.Data" />
-    <Reference Include="System.Xml" />
  </ItemGroup>
  <ItemGroup>
    <Compile Include="Configuration\ConfigurationService.cs" />
@ -66,6 +61,7 @@
    <Compile Include="Core\Accumulator.cs" />
    <Compile Include="Core\Broadcast.cs" />
    <Compile Include="Core\Option.cs" />
+    <Compile Include="Core\Partitioner.cs" />
    <Compile Include="Core\RDDCollector.cs" />
    <Compile Include="Core\DoubleRDDFunctions.cs" />
    <Compile Include="Core\IRDDCollector.cs" />
@ -80,12 +76,17 @@
    <Compile Include="Core\StatusTracker.cs" />
    <Compile Include="Core\StorageLevel.cs" />
    <Compile Include="Interop\Ipc\JsonSerDe.cs" />
+    <Compile Include="Interop\Ipc\JvmBridgeUtils.cs" />
+    <Compile Include="Interop\Ipc\WeakObjectManager.cs" />
    <Compile Include="Interop\SparkCLREnvironment.cs" />
    <Compile Include="Interop\Ipc\IJvmBridge.cs" />
    <Compile Include="Interop\Ipc\JvmBridge.cs" />
    <Compile Include="Interop\Ipc\JvmObjectReference.cs" />
    <Compile Include="Interop\Ipc\PayloadHelper.cs" />
    <Compile Include="Interop\Ipc\SerDe.cs" />
+    <Compile Include="Network\DefaultSocketWrapper.cs" />
+    <Compile Include="Network\ISocketWrapper.cs" />
+    <Compile Include="Network\SocketFactory.cs" />
    <Compile Include="Properties\AssemblyInfo.cs" />
    <Compile Include="Proxy\IDataFrameNaFunctionsProxy.cs" />
    <Compile Include="Proxy\IDataFrameProxy.cs" />
@ -122,6 +123,7 @@
    <Compile Include="Sql\DataFrameNaFunctions.cs" />
    <Compile Include="Sql\DataFrameReader.cs" />
    <Compile Include="Sql\DataFrameWriter.cs" />
+    <Compile Include="Sql\HiveContext.cs" />
    <Compile Include="Sql\PythonSerDe.cs" />
    <Compile Include="Sql\RowConstructor.cs" />
    <Compile Include="Sql\Row.cs" />
@ -130,8 +132,11 @@
    <Compile Include="Sql\SqlContext.cs" />
    <Compile Include="Sql\Types.cs" />
    <Compile Include="Sql\UserDefinedFunction.cs" />
+    <Compile Include="Streaming\ConstantInputDStream.cs" />
    <Compile Include="Streaming\DStream.cs" />
+    <Compile Include="Streaming\EventHubsUtils.cs" />
    <Compile Include="Streaming\Kafka.cs" />
+    <Compile Include="Streaming\MapWithStateDStream.cs" />
    <Compile Include="Streaming\PairDStreamFunctions.cs" />
    <Compile Include="Streaming\StreamingContext.cs" />
    <Compile Include="Streaming\TransformedDStream.cs" />
@ -157,13 +162,7 @@
  </Target>
  -->
  <Target Name="AfterBuild">
-    <XslTransformation
-      XslInputPath="..\documentation\DocFormatter.xsl"
-      XmlInputPaths="..\documentation\Microsoft.Spark.CSharp.Adapter.Doc.XML"
-      OutputPaths="..\documentation\SparkCLR_API_Documentation.md"
-      Condition="'$(OS)' == 'Windows_NT'" />
-    <Exec 
-      Command="xsltproc -o ../documentation/SparkCLR_API_Documentation.md ../documentation/DocFormatter.xsl ../documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML"
-      Condition="'$(OS)' != 'Windows_NT'" />
+    <XslTransformation XslInputPath="..\documentation\DocFormatter.xsl" XmlInputPaths="..\documentation\Microsoft.Spark.CSharp.Adapter.Doc.XML" OutputPaths="..\documentation\Mobius_API_Documentation.md" Condition="'$(OS)' == 'Windows_NT'" />
+    <Exec Command="xsltproc -o ../documentation/Mobius_API_Documentation.md ../documentation/DocFormatter.xsl ../documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML" Condition="'$(OS)' != 'Windows_NT'" />
  </Target>
 </Project>
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Configuration/ConfigurationService.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Configuration/ConfigurationService.cs
@ -2,15 +2,11 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.

 using System;
-using System.Collections.Generic;
 using System.Configuration;
 using System.Diagnostics;
 using System.IO;
 using System.Linq;
 using System.Reflection;
-using System.Text;
-using System.Threading.Tasks;
-using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Services;

 namespace Microsoft.Spark.CSharp.Configuration
@ -91,7 +87,7 @@ namespace Microsoft.Spark.CSharp.Configuration
        {
            protected readonly AppSettingsSection appSettings;
            protected readonly string sparkCLRHome = Environment.GetEnvironmentVariable(SPARKCLR_HOME); //set by sparkclr-submit.cmd
-            protected readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRConfiguration));
+            private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRConfiguration));

            internal SparkCLRConfiguration(System.Configuration.Configuration configuration)
            {
@ -109,14 +105,36 @@ namespace Microsoft.Spark.CSharp.Configuration
                    throw new Exception("Environment variable " + CSHARPBACKEND_PORT + " not set");
                }

-                logger.LogInfo("CSharpBackend successfully read from environment variable " + CSHARPBACKEND_PORT);
+                logger.LogInfo("CSharpBackend successfully read from environment variable {0}", CSHARPBACKEND_PORT);
                return portNo;
            }

+            private string workerPath;
+
            /// <summary>
            /// The path of the CSharp external backend worker process.
            /// </summary>
            internal virtual string GetCSharpWorkerExePath()
+            {
+                // SparkCLR jar and driver, worker & dependencies are shipped using Spark file server. 
+                // These files are available in the Spark executing directory at executor node.
+                if (workerPath != null) return workerPath; // Return cached value
+
+                var workerPathConfig = appSettings.Settings[CSharpWorkerPathSettingKey];
+                if (workerPathConfig == null)
+                {
+                    workerPath = GetCSharpProcFileName();
+                }
+                else
+                {
+                    // Explicit path for the CSharpWorker.exe was listed in App.config
+                    workerPath = workerPathConfig.Value;
+                    logger.LogDebug("Using CSharpWorkerPath value from App.config : {0}", workerPath);
+                }
+                return workerPath;
+            }
+
+            internal virtual string GetCSharpProcFileName()
            {
                return ProcFileName;
            }
@ -124,50 +142,33 @@ namespace Microsoft.Spark.CSharp.Configuration

        /// <summary>
        /// Configuration for SparkCLR jobs in ** Local ** mode
-        /// Needs some investigation to find out why Local mode behaves
-        /// different than standalone cluster mode for the configuration values
-        /// overridden here
        /// </summary>
        private class SparkCLRLocalConfiguration : SparkCLRConfiguration
        {
+            private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRLocalConfiguration));
            internal SparkCLRLocalConfiguration(System.Configuration.Configuration configuration)
                : base(configuration)
            { }

-            private string workerPath;
-            internal override string GetCSharpWorkerExePath()
+            internal override string GetCSharpProcFileName()
            {
-                // SparkCLR jar and driver, worker & dependencies are shipped using Spark file server. 
-                // These files are available in the Spark executing directory at executor node.
-
-                if (workerPath != null) return workerPath; // Return cached value
-
-                KeyValueConfigurationElement workerPathConfig = appSettings.Settings[CSharpWorkerPathSettingKey];
-                if (workerPathConfig == null)
-                {
-                    // Path for the CSharpWorker.exe was not specified in App.config
-                    // Try to work out where location relative to this class.
-                    // Construct path based on well-known file name + directory this class was loaded from.
-                    string procDir = Path.GetDirectoryName(GetType().Assembly.Location);
-                    workerPath = Path.Combine(procDir, ProcFileName);
-                    logger.LogDebug("Using synthesized value for CSharpWorkerPath : " + workerPath);
-                }
-                else
-                {
-                    // Explicit path for the CSharpWorker.exe was listed in App.config
-                    workerPath = workerPathConfig.Value;
-                    logger.LogDebug("Using CSharpWorkerPath value from App.config : " + workerPath);
-                }
-                return workerPath;
+                // Path for the CSharpWorker.exe was not specified in App.config
+                // Try to work out where location relative to this class.
+                // Construct path based on well-known file name + directory this class was loaded from.
+                string procDir = Path.GetDirectoryName(GetType().Assembly.Location);
+                var procFilePath = Path.Combine(procDir, ProcFileName);
+                logger.LogDebug("Using SparkCLR Adapter dll path to construct CSharpWorkerPath : {0}", procFilePath);
+                return procFilePath;
            }
        }

        /// <summary>
        /// Configuration mode for debug mode
-        /// This configuration exists only to make SparkCLR development & debugging easier
+        /// This configuration exists only to make SparkCLR development and debugging easier
        /// </summary>
        private class SparkCLRDebugConfiguration : SparkCLRLocalConfiguration
        {
+            private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRDebugConfiguration));
            internal SparkCLRDebugConfiguration(System.Configuration.Configuration configuration)
                : base(configuration)
            {}
@ -192,9 +193,14 @@ namespace Microsoft.Spark.CSharp.Configuration
                KeyValueConfigurationElement workerPathConfig = appSettings.Settings[CSharpWorkerPathSettingKey];
                if (workerPathConfig != null)
                {
+                    logger.LogInfo("Worker path read from setting {0} in app config", CSharpWorkerPathSettingKey);
                    return workerPathConfig.Value;
                }
-                return GetSparkCLRArtifactsPath("bin", ProcFileName);
+                
+                var path = GetSparkCLRArtifactsPath("bin", ProcFileName);
+                logger.LogInfo("Worker path {0} constructed using {1} environment variable", path, SPARKCLR_HOME);
+
+                return path;
            }

            private string GetSparkCLRArtifactsPath(string sparkCLRSubFolderName, string fileName)
@ -209,14 +215,31 @@ namespace Microsoft.Spark.CSharp.Configuration
        }
    }

+    /// <summary>
+    /// The running mode used by Configuration Service
+    /// </summary>
    public enum RunMode
    {
+        /// <summary>
+        /// Unknown running mode
+        /// </summary>
        UNKNOWN,
-        DEBUG, //not a Spark mode but exists for dev debugging purpose
+        /// <summary>
+        /// Debug mode, not a Spark mode but exists for develop debugging purpose
+        /// </summary>
+        DEBUG,
+        /// <summary>
+        /// Indicates service is running in local
+        /// </summary>
        LOCAL,
+        /// <summary>
+        /// Indicates service is running in cluster
+        /// </summary>
        CLUSTER,
-        YARN,
-        //following are not currently supported
-        MESOS
+        /// <summary>
+        /// Indicates service is running in Yarn
+        /// </summary>
+        YARN
+        //MESOS //not currently supported
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/Accumulator.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/Accumulator.cs
@ -12,6 +12,7 @@ using System.Runtime.Serialization;
 using System.Runtime.Serialization.Formatters.Binary;

 using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Services;

 [assembly: InternalsVisibleTo("CSharpWorker")]
@ -35,10 +36,26 @@ namespace Microsoft.Spark.CSharp.Core
    {
        internal static Dictionary<int, Accumulator> accumulatorRegistry = new Dictionary<int, Accumulator>();

+        [ThreadStatic] // Thread safe is needed when running in C# worker
+        internal static Dictionary<int, Accumulator> threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
+
+        /// <summary>
+        /// The identity of the accumulator 
+        /// </summary>
        protected int accumulatorId;
+
+        /// <summary>
+        /// Indicates whether the accumulator is on driver side.
+        /// When deserialized on worker side, isDriver is false by default.
+        /// </summary>
        [NonSerialized]
-        protected bool deserialized = true;
+        protected bool isDriver = false;
    }
+
+    /// <summary>
+    /// A generic version of <see cref="Accumulator"/> where the element type is specified by the driver program.
+    /// </summary>
+    /// <typeparam name="T">The type of element in the accumulator.</typeparam>
    [Serializable]
    public class Accumulator<T> : Accumulator
    {
@ -46,20 +63,42 @@ namespace Microsoft.Spark.CSharp.Core
        internal T value;
        private readonly AccumulatorParam<T> accumulatorParam = new AccumulatorParam<T>();

+        /// <summary>
+        /// Initializes a new instance of the Accumulator class with a specified identity and a value.
+        /// </summary>
+        /// <param name="accumulatorId">The Identity of the accumulator</param>
+        /// <param name="value">The value of the accumulator</param>
        public Accumulator(int accumulatorId, T value)
        {
            this.accumulatorId = accumulatorId;
            this.value = value;
-            deserialized = false;
+            isDriver = true;
            accumulatorRegistry[accumulatorId] = this;
        }

+        [OnDeserialized()]
+        internal void OnDeserializedMethod(System.Runtime.Serialization.StreamingContext context)
+        {
+            if (threadLocalAccumulatorRegistry == null)
+            {
+                threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
+            }
+            if (!threadLocalAccumulatorRegistry.ContainsKey(accumulatorId))
+            {
+                threadLocalAccumulatorRegistry[accumulatorId] = this;
+            }
+        }
+
+        /// <summary>
+        /// Gets or sets the value of the accumulator; only usable in driver program
+        /// </summary>
+        /// <exception cref="ArgumentException"></exception>
        public T Value
        {
            // Get the accumulator's value; only usable in driver program
            get
            {
-                if (deserialized)
+                if (!isDriver)
                {
                    throw new ArgumentException("Accumulator.value cannot be accessed inside tasks");
                }
@ -68,7 +107,7 @@ namespace Microsoft.Spark.CSharp.Core
            // Sets the accumulator's value; only usable in driver program
            set
            {
-                if (deserialized)
+                if (!isDriver)
                {
                    throw new ArgumentException("Accumulator.value cannot be accessed inside tasks");
                }
@ -94,14 +133,14 @@ namespace Microsoft.Spark.CSharp.Core
        /// <returns></returns>
        public static Accumulator<T> operator +(Accumulator<T> self, T term)
        {
-            if (!accumulatorRegistry.ContainsKey(self.accumulatorId))
-            {
-                accumulatorRegistry[self.accumulatorId] = self;
-            }
            self.Add(term);
            return self;
        }

+        /// <summary>
+        /// Creates and returns a string representation of the current accumulator
+        /// </summary>
+        /// <returns>A string representation of the current accumulator</returns>
        public override string ToString()
        {
            return string.Format("Accumulator<id={0}, value={1}>", accumulatorId, value);
@ -143,33 +182,33 @@ namespace Microsoft.Spark.CSharp.Core
    /// A simple TCP server that intercepts shutdown() in order to interrupt
    /// our continuous polling on the handler.
    /// </summary>
-    internal class AccumulatorServer : System.Net.Sockets.TcpListener
+    internal class AccumulatorServer
    {
        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(AccumulatorServer));
        private volatile bool serverShutdown;
+        private ISocketWrapper innerSocket;

        internal AccumulatorServer()
-            : base(IPAddress.Loopback, 0)
        {
-
+            innerSocket = SocketFactory.CreateSocket();
        }

        internal void Shutdown()
        {
            serverShutdown = true;
-            base.Stop();
+            innerSocket.Close();
        }

        internal int StartUpdateServer()
        {
-            base.Start();
+            innerSocket.Listen();
            Task.Run(() =>
            {
                try
                {
                    IFormatter formatter = new BinaryFormatter();
-                    using (Socket s = AcceptSocket())
-                    using (var ns = new NetworkStream(s))
+                    using (var s = innerSocket.Accept())
+                    using (var ns = s.GetStream())
                    {
                        while (!serverShutdown)
                        {
@ -199,7 +238,7 @@ namespace Microsoft.Spark.CSharp.Core
                }
                catch (SocketException e)
                {
-                    if (e.ErrorCode != 10004)   // A blocking operation was interrupted by a call to WSACancelBlockingCall - TcpListener.Stop cancelled AccepSocket as expected
+                    if (e.ErrorCode != 10004)   // A blocking operation was interrupted by a call to WSACancelBlockingCall - ISocketWrapper.Close canceled Accep() as expected
                        throw e;
                }
                catch (Exception e)
@ -209,7 +248,7 @@ namespace Microsoft.Spark.CSharp.Core
                }
            });
            
-            return (base.LocalEndpoint as IPEndPoint).Port;
+            return (innerSocket.LocalEndPoint as IPEndPoint).Port;
        }
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/Broadcast.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/Broadcast.cs
@ -8,6 +8,7 @@ using System.Text;
 using System.Threading.Tasks;
 using System.IO;
 using System.Runtime.Serialization.Formatters.Binary;
+using System.Collections.Concurrent;

 using Microsoft.Spark.CSharp.Proxy;

@ -30,13 +31,21 @@ namespace Microsoft.Spark.CSharp.Core
    [Serializable]
    public class Broadcast
    {
+        /// <summary>
+        /// A thread-safe static collection that is used to store registered broadcast objects.
+        /// </summary>
        [NonSerialized]
-        public static Dictionary<long, Broadcast> broadcastRegistry = new Dictionary<long, Broadcast>();
+        public static ConcurrentDictionary<long, Broadcast> broadcastRegistry = new ConcurrentDictionary<long, Broadcast>();
        [NonSerialized]
        internal string path;

        internal long broadcastId;
        internal Broadcast() { }
+
+        /// <summary>
+        /// Initializes a new instance of Broadcast class with a specified path.
+        /// </summary>
+        /// <param name="path">The path that to be set.</param>
        public Broadcast(string path)
        {
            this.path = path;
@ -59,6 +68,11 @@ namespace Microsoft.Spark.CSharp.Core
            }
        }
    }
+
+    /// <summary>
+    /// A generic version of <see cref="Broadcast"/> where the element can be specified.
+    /// </summary>
+    /// <typeparam name="T">The type of element in Broadcast</typeparam>
    [Serializable]
    public class Broadcast<T> : Broadcast
    {
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/DoubleRDDFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/DoubleRDDFunctions.cs
@ -9,6 +9,9 @@ using System.Threading.Tasks;

 namespace Microsoft.Spark.CSharp.Core
 {
+    /// <summary>
+    /// Extra functions available on RDDs of Doubles through an implicit conversion. 
+    /// </summary>
    public static class DoubleRDDFunctions
    {
        /// <summary>
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/Option.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/Option.cs
@ -16,17 +16,32 @@ namespace Microsoft.Spark.CSharp.Core
        private bool isDefined = false;
        private T value;

+        /// <summary>
+        /// Initialize a instance of Option class without any value.
+        /// </summary>
        public Option()
        { }

+        /// <summary>
+        /// Initializes a instance of Option class with a specific value. 
+        /// </summary>
+        /// <param name="value">The value to be associated with the new instance.</param>
        public Option(T value)
        {
            isDefined = true;
            this.value = value;
        }

+        /// <summary>
+        /// Indicates whether the option value is defined.
+        /// </summary>
        public bool IsDefined { get { return isDefined; } }

+        /// <summary>
+        /// Returns the value of the option if Option.IsDefined is TRUE;
+        /// otherwise, throws an <see cref="ArgumentException"/>.
+        /// </summary>
+        /// <returns></returns>
        public T GetValue()
        {
            if (isDefined) return value;
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
@ -1,6 +1,7 @@
 // Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.

+using Microsoft.Spark.CSharp.Core;
 using System;
 using System.Collections.Generic;
 using System.Linq;
@ -9,26 +10,15 @@ using System.Threading.Tasks;

 namespace Microsoft.Spark.CSharp.Core
 {
+    /// <summary>
+    /// Extra functions available on RDDs of (key, value) pairs where the key is sortable through
+    /// a function to sort the key.
+    /// </summary>
    public static class OrderedRDDFunctions
    {
+
        /// <summary>
-        /// Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
-        /// `collect` or `save` on the resulting RDD will return or output an ordered list of records
-        /// (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
-        /// order of the keys).
-        /// 
-        /// >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
-        /// >>> sc.parallelize(tmp).sortByKey().first()
-        /// ('1', 3)
-        /// >>> sc.parallelize(tmp).sortByKey(True, 1).collect()
-        /// [('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
-        /// >>> sc.parallelize(tmp).sortByKey(True, 2).collect()
-        /// [('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
-        /// >>> tmp2 = [('Mary', 1), ('had', 2), ('a', 3), ('little', 4), ('lamb', 5)]
-        /// >>> tmp2.extend([('whose', 6), ('fleece', 7), ('was', 8), ('white', 9)])
-        /// >>> sc.parallelize(tmp2).sortByKey(True, 3, keyfunc=lambda k: k.lower()).collect()
-        /// [('a', 3), ('fleece', 7), ('had', 2), ('lamb', 5),...('white', 9), ('whose', 6)]
-        /// 
+        /// Sorts this RDD, which is assumed to consist of KeyValuePair pairs.
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
@ -36,26 +26,75 @@ namespace Microsoft.Spark.CSharp.Core
        /// <param name="ascending"></param>
        /// <param name="numPartitions"></param>
        /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> SortByKey<K, V>(
-            this RDD<KeyValuePair<K, V>> self, 
-            bool ascending = true, 
-            int? numPartitions = null)
+        public static RDD<KeyValuePair<K, V>> SortByKey<K, V>(this RDD<KeyValuePair<K, V>> self,
+            bool ascending = true, int? numPartitions = null)
        {
-            throw new NotImplementedException();
+            return SortByKey<K, V, K>(self, ascending, numPartitions, new DefaultSortKeyFuncHelper<K>().Execute);
        }
+        /// <summary>
+        /// Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.
+        /// </summary>
+        /// <typeparam name="K"></typeparam>
+        /// <typeparam name="V"></typeparam>
+        /// <typeparam name="U"></typeparam>
+        /// <param name="self"></param>
+        /// <param name="ascending"></param>
+        /// <param name="numPartitions">Number of partitions. Each partition of the sorted RDD contains a sorted range of the elements.</param>
+        /// <param name="keyFunc">RDD will sort by keyFunc(key) for every key in KeyValuePair. Must not be null.</param>
+        /// <returns></returns>
+        public static RDD<KeyValuePair<K, V>> SortByKey<K, V, U>(this RDD<KeyValuePair<K, V>> self,
+            bool ascending, int? numPartitions, Func<K, U> keyFunc)
+        {
+            if (keyFunc == null)
+            {
+                throw new ArgumentNullException("keyFunc cannot be null.");
+            }

+            if (numPartitions == null)
+            {
+                numPartitions = self.GetDefaultPartitionNum();
+            }
+
+            if (numPartitions == 1)
+            {
+                if (self.GetNumPartitions() > 1)
+                {
+                    self = self.Coalesce(1);
+                }
+                return self.MapPartitionsWithIndex(new SortByKeyHelper<K, V, U>(keyFunc, ascending).Execute, true);
+            }
+
+            var rddSize = self.Count();
+            if (rddSize == 0) return self; // empty RDD
+
+            var maxSampleSize = numPartitions.Value * 20; // constant from Spark's RangePartitioner
+            double fraction = Math.Min((double)maxSampleSize / Math.Max(rddSize, 1), 1.0);
+
+            /* first compute the boundary of each part via sampling: we want to partition
+             * the key-space into bins such that the bins have roughly the same
+             * number of (key, value) pairs falling into them */
+            U[] samples = self.Sample(false, fraction, 1).Map(kv => kv.Key).Collect().Select(k => keyFunc(k)).ToArray();
+            Array.Sort(samples, StringComparer.Ordinal); // case sensitive if key type is string
+
+            List<U> bounds = new List<U>();
+            for (int i = 0; i < numPartitions - 1; i++)
+            {
+                bounds.Add(samples[(int)(samples.Length * (i + 1) / numPartitions)]);
+            }
+
+            return self.PartitionBy(numPartitions.Value, 
+                new PairRDDFunctions.PartitionFuncDynamicTypeHelper<K>(
+                    new RangePartitionerHelper<K, U>(numPartitions.Value, keyFunc, bounds, ascending).Execute)
+                    .Execute)
+                        .MapPartitionsWithIndex(new SortByKeyHelper<K, V, U>(keyFunc, ascending).Execute, true);
+        }
+        
        /// <summary>
        /// Repartition the RDD according to the given partitioner and, within each resulting partition,
        /// sort records by their keys.
        ///
        /// This is more efficient than calling `repartition` and then sorting within each partition
        /// because it can push the sorting down into the shuffle machinery.
-        /// 
-        /// >>> rdd = sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)])
-        /// >>> rdd2 = rdd.repartitionAndSortWithinPartitions(2, lambda x: x % 2, 2)
-        /// >>> rdd2.glom().collect()
-        /// [[(0, 5), (0, 8), (2, 6)], [(1, 3), (3, 8), (3, 8)]]
-        /// 
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
@ -72,5 +111,69 @@ namespace Microsoft.Spark.CSharp.Core
        {
            return self.MapPartitionsWithIndex<KeyValuePair<K, V>>((pid, iter) => ascending ? iter.OrderBy(kv => kv.Key) : iter.OrderByDescending(kv => kv.Key));
        }
+
+        [Serializable]
+        internal class SortByKeyHelper<K, V, U>
+        {
+            private readonly Func<K, U> func;
+            private readonly bool ascending;
+            public SortByKeyHelper(Func<K, U> f, bool ascending = true)
+            {
+                func = f;
+                this.ascending = ascending;
+            }
+
+            public IEnumerable<KeyValuePair<K, V>> Execute(int pid, IEnumerable<KeyValuePair<K, V>> kvs)
+            {
+                IEnumerable<KeyValuePair<K, V>> ordered;
+                if (ascending)
+                {
+                    if (typeof(K) == typeof(string))
+                        ordered = kvs.OrderBy(k => func(k.Key).ToString(), StringComparer.Ordinal);
+                    else
+                        ordered = kvs.OrderBy(k => func(k.Key));
+                }
+                else
+                {
+                    if (typeof(K) == typeof(string))
+                        ordered = kvs.OrderByDescending(k => func(k.Key).ToString(), StringComparer.Ordinal);
+                    else
+                        ordered = kvs.OrderByDescending(k => func(k.Key));
+                }
+                return ordered;
+            }
+        }
+
+        [Serializable]
+        internal class DefaultSortKeyFuncHelper<K>
+        {
+            public K Execute(K key) { return key; }
+        }
+
+        [Serializable]
+        internal class RangePartitionerHelper<K, U>
+        {
+            private readonly int numPartitions;
+            private readonly Func<K, U> keyFunc;
+            private readonly List<U> bounds;
+            private readonly bool ascending;
+            public RangePartitionerHelper(int numPartitions, Func<K, U> keyFunc, List<U> bounds, bool ascending)
+            {
+                this.numPartitions = numPartitions;
+                this.bounds = bounds;
+                this.keyFunc = keyFunc;
+                this.ascending = ascending;
+            }
+
+            public int Execute(K key)
+            {
+                // Binary search the insert position in the bounds. If key found, return the insert position; if not, a negative
+                // number that is the bitwise complement of insert position is returned, so bitwise inversing it.
+                var pos = bounds.BinarySearch(keyFunc(key));
+                if (pos < 0) pos = ~pos;
+
+                return ascending ? pos : numPartitions - 1 - pos;
+            }
+        }
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
@ -8,6 +8,7 @@ using System.Runtime.Serialization;
 using System.Runtime.Serialization.Formatters.Binary;
 using System.IO;
 using System.Security.Cryptography;
+using Microsoft.Spark.CSharp.Interop.Ipc;

 namespace Microsoft.Spark.CSharp.Core
 {
@ -21,7 +22,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// <summary>
        /// Return the key-value pairs in this RDD to the master as a dictionary.
        ///
-        /// var m = sc.Parallelize(new[] { new <see cref="KeyValuePair{int, int}"/>(1, 2), new <see cref="KeyValuePair{int, int}"/>(3, 4) }, 1).CollectAsMap()
+        /// var m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int>(1, 2), new KeyValuePair&lt;int, int>(3, 4) }, 1).CollectAsMap()
        /// m[1]
        /// 2
        /// m[3]
@ -40,7 +41,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// <summary>
        /// Return an RDD with the keys of each tuple.
        ///
-        /// >>> m = sc.Parallelize(new[] { new <see cref="KeyValuePair{int, int}"/>(1, 2), new <see cref="KeyValuePair{int, int}"/>(3, 4) }, 1).Keys().Collect()
+        /// >>> m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int>(1, 2), new KeyValuePair&lt;int, int>(3, 4) }, 1).Keys().Collect()
        /// [1, 3]
        /// </summary>
        /// <typeparam name="K"></typeparam>
@ -55,7 +56,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// <summary>
        /// Return an RDD with the values of each tuple.
        ///
-        /// >>> m = sc.Parallelize(new[] { new <see cref="KeyValuePair{int, int}"/>(1, 2), new <see cref="KeyValuePair{int, int}"/>(3, 4) }, 1).Values().Collect()
+        /// >>> m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int>(1, 2), new KeyValuePair&lt;int, int>(3, 4) }, 1).Values().Collect()
        /// [2, 4]
        /// 
        /// </summary>
@ -79,9 +80,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// 
        /// sc.Parallelize(new[] 
        /// { 
-        ///     new <see cref="KeyValuePair{string, int}"/>("a", 1), 
-        ///     new <see cref="KeyValuePair{string, int}"/>("b", 1),
-        ///     new <see cref="KeyValuePair{string, int}"/>("a", 1)
+        ///     new KeyValuePair&lt;string, int>("a", 1), 
+        ///     new KeyValuePair&lt;string, int>("b", 1),
+        ///     new KeyValuePair&lt;string, int>("a", 1)
        /// }, 2)
        /// .ReduceByKey((x, y) => x + y).Collect()
        ///        
@ -108,9 +109,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// 
        /// sc.Parallelize(new[] 
        /// { 
-        ///     new <see cref="KeyValuePair{string, int}"/>("a", 1), 
-        ///     new <see cref="KeyValuePair{string, int}"/>("b", 1),
-        ///     new <see cref="KeyValuePair{string, int}"/>("a", 1)
+        ///     new KeyValuePair&lt;string, int>("a", 1), 
+        ///     new KeyValuePair&lt;string, int>("b", 1),
+        ///     new KeyValuePair&lt;string, int>("a", 1)
        /// }, 2)
        /// .ReduceByKeyLocally((x, y) => x + y).Collect()
        /// 
@ -132,9 +133,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// 
        /// sc.Parallelize(new[] 
        /// { 
-        ///     new <see cref="KeyValuePair{string, int}"/>("a", 1), 
-        ///     new <see cref="KeyValuePair{string, int}"/>("b", 1),
-        ///     new <see cref="KeyValuePair{string, int}"/>("a", 1)
+        ///     new KeyValuePair&lt;string, int>("a", 1), 
+        ///     new KeyValuePair&lt;string, int>("b", 1),
+        ///     new KeyValuePair&lt;string, int>("a", 1)
        /// }, 2)
        /// .CountByKey((x, y) => x + y).Collect()
        /// 
@ -158,9 +159,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// Performs a hash join across the cluster.
        /// 
        /// var l = sc.Parallelize(
-        ///     new[] { new <see cref="KeyValuePair{string, int}"/>("a", 1), new <see cref="KeyValuePair{string, int}"/>("b", 4) }, 1);
+        ///     new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 1);
        /// var r = sc.Parallelize(
-        ///     new[] { new <see cref="KeyValuePair{string, int}"/>("a", 2), new <see cref="KeyValuePair{string, int}"/>("a", 3) }, 1);
+        ///     new[] { new KeyValuePair&lt;string, int>("a", 2), new KeyValuePair&lt;string, int>("a", 3) }, 1);
        /// var m = l.Join(r, 2).Collect();
        /// 
        /// [('a', (1, 2)), ('a', (1, 3))]
@ -193,9 +194,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// Hash-partitions the resulting RDD into the given number of partitions.
        /// 
        /// var l = sc.Parallelize(
-        ///     new[] { new <see cref="KeyValuePair{string, int}"/>("a", 1), new <see cref="KeyValuePair{string, int}"/>("b", 4) }, 1);
+        ///     new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 1);
        /// var r = sc.Parallelize(
-        ///     new[] { new <see cref="KeyValuePair{string, int}"/>("a", 2) }, 1);
+        ///     new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
        /// var m = l.LeftOuterJoin(r).Collect();
        /// 
        /// [('a', (1, 2)), ('b', (4, Option))]
@ -227,9 +228,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// Hash-partitions the resulting RDD into the given number of partitions.
        /// 
        /// var l = sc.Parallelize(
-        ///     new[] { new <see cref="KeyValuePair{string, int}"/>("a", 2) }, 1);
+        ///     new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
        /// var r = sc.Parallelize(
-        ///     new[] { new <see cref="KeyValuePair{string, int}"/>("a", 1), new <see cref="KeyValuePair{string, int}"/>("b", 4) }, 1);
+        ///     new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 1);
        /// var m = l.RightOuterJoin(r).Collect();
        /// 
        /// [('a', (2, 1)), ('b', (Option, 4))]
@ -266,9 +267,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// Hash-partitions the resulting RDD into the given number of partitions.
        /// 
        /// var l = sc.Parallelize(
-        ///     new[] { new <see cref="KeyValuePair{string, int}"/>("a", 1), <see cref="KeyValuePair{string, int}"/>("b", 4) }, 1);
+        ///     new[] { new KeyValuePair&lt;string, int>("a", 1), KeyValuePair&lt;string, int>("b", 4) }, 1);
        /// var r = sc.Parallelize(
-        ///     new[] { new <see cref="KeyValuePair{string, int}"/>("a", 2), new <see cref="KeyValuePair{string, int}"/>("c", 8) }, 1);
+        ///     new[] { new KeyValuePair&lt;string, int>("a", 2), new KeyValuePair&lt;string, int>("c", 8) }, 1);
        /// var m = l.FullOuterJoin(r).Collect();
        /// 
        /// [('a', (1, 2)), ('b', (4, None)), ('c', (None, 8))]
@ -294,30 +295,31 @@ namespace Microsoft.Spark.CSharp.Core
        /// <summary>
        /// Return a copy of the RDD partitioned using the specified partitioner.
        /// 
-        /// sc.Parallelize(new[] { 1, 2, 3, 4, 2, 4, 1 }, 1).Map(x => new <see cref="KeyValuePair{int, int}"/>(x, x)).PartitionBy(3).Glom().Collect()
+        /// sc.Parallelize(new[] { 1, 2, 3, 4, 2, 4, 1 }, 1).Map(x => new KeyValuePair&lt;int, int>(x, x)).PartitionBy(3).Glom().Collect()
        /// </summary>
        /// <param name="self"></param>
        /// <param name="numPartitions"></param>
+        /// <param name="partitionFunc"></param>
        /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> PartitionBy<K, V>(this RDD<KeyValuePair<K, V>> self, int numPartitions = 0)
+        public static RDD<KeyValuePair<K, V>> PartitionBy<K, V>(this RDD<KeyValuePair<K, V>> self, int numPartitions = 0, 
+            Func<dynamic, int> partitionFunc = null)
        {
            if (numPartitions == 0)
            {
-                numPartitions = self.sparkContext.SparkConf.SparkConfProxy.GetInt("spark.default.parallelism", 0);
-                if (numPartitions == 0 && self.previousRddProxy != null)
-                    numPartitions = self.previousRddProxy.PartitionLength();
+                numPartitions = self.GetDefaultPartitionNum();
            }
-
-            int? partitioner = numPartitions;
-            if (self.partitioner == partitioner)
+            
+            var partitioner = new Partitioner(numPartitions, partitionFunc);
+            if (self.partitioner != null && self.partitioner.Equals(partitioner))
                return self;

-            var keyed = self.MapPartitionsWithIndex(new AddShuffleKeyHelper<K, V>().Execute, true);
+            var keyed = self.MapPartitionsWithIndex(new AddShuffleKeyHelper<K, V>(numPartitions, partitionFunc).Execute, true);
            keyed.bypassSerializer = true;
            // convert shuffling version of RDD[(Long, Array[Byte])] back to normal RDD[Array[Byte]]
            // invoking property keyed.RddProxy marks the end of current pipeline RDD after shuffling
            // and potentially starts next pipeline RDD with defult SerializedMode.Byte
-            var rdd = new RDD<KeyValuePair<K, V>>(self.sparkContext.SparkContextProxy.CreatePairwiseRDD(keyed.RddProxy, numPartitions), self.sparkContext);
+            var rdd = new RDD<KeyValuePair<K, V>>(self.sparkContext.SparkContextProxy.CreatePairwiseRDD(keyed.RddProxy, numPartitions,
+                GenerateObjectId(partitionFunc)), self.sparkContext);
            rdd.partitioner = partitioner;

            return rdd;
@ -344,9 +346,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// sc.Parallelize(
        ///         new[] 
        ///         { 
-        ///             new <see cref="KeyValuePair{string, int}"/>("a", 1), 
-        ///             new <see cref="KeyValuePair{string, int}"/>("b", 1),
-        ///             new <see cref="KeyValuePair{string, int}"/>("a", 1)
+        ///             new KeyValuePair&lt;string, int>("a", 1), 
+        ///             new KeyValuePair&lt;string, int>("b", 1),
+        ///             new KeyValuePair&lt;string, int>("a", 1)
        ///         }, 2)
        ///         .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(), (x, y) => x + y).Collect()
        ///         
@ -387,9 +389,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// sc.Parallelize(
        ///         new[] 
        ///         { 
-        ///             new <see cref="KeyValuePair{string, int}"/>("a", 1), 
-        ///             new <see cref="KeyValuePair{string, int}"/>("b", 1),
-        ///             new <see cref="KeyValuePair{string, int}"/>("a", 1)
+        ///             new KeyValuePair&lt;string, int>("a", 1), 
+        ///             new KeyValuePair&lt;string, int>("b", 1),
+        ///             new KeyValuePair&lt;string, int>("a", 1)
        ///         }, 2)
        ///         .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(), (x, y) => x + y).Collect()
        ///         
@ -423,9 +425,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// sc.Parallelize(
        ///         new[] 
        ///         { 
-        ///             new <see cref="KeyValuePair{string, int}"/>("a", 1), 
-        ///             new <see cref="KeyValuePair{string, int}"/>("b", 1),
-        ///             new <see cref="KeyValuePair{string, int}"/>("a", 1)
+        ///             new KeyValuePair&lt;string, int>("a", 1), 
+        ///             new KeyValuePair&lt;string, int>("b", 1),
+        ///             new KeyValuePair&lt;string, int>("a", 1)
        ///         }, 2)
        ///         .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(), (x, y) => x + y).Collect()
        ///         
@ -458,9 +460,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// sc.Parallelize(
        ///         new[] 
        ///         { 
-        ///             new <see cref="KeyValuePair{string, int}"/>("a", 1), 
-        ///             new <see cref="KeyValuePair{string, int}"/>("b", 1),
-        ///             new <see cref="KeyValuePair{string, int}"/>("a", 1)
+        ///             new KeyValuePair&lt;string, int>("a", 1), 
+        ///             new KeyValuePair&lt;string, int>("b", 1),
+        ///             new KeyValuePair&lt;string, int>("a", 1)
        ///         }, 2)
        ///         .GroupByKey().MapValues(l => string.Join(" ", l)).Collect()
        ///         
@ -488,8 +490,8 @@ namespace Microsoft.Spark.CSharp.Core
        /// sc.Parallelize(
        ///         new[] 
        ///         { 
-        ///             new <see cref="KeyValuePair{string, string[]}"/>("a", new[]{"apple", "banana", "lemon"}), 
-        ///             new <see cref="KeyValuePair{string, string[]}"/>("b", new[]{"grapes"})
+        ///             new KeyValuePair&lt;string, string[]>("a", new[]{"apple", "banana", "lemon"}), 
+        ///             new KeyValuePair&lt;string, string[]>("b", new[]{"grapes"})
        ///         }, 2)
        ///         .MapValues(x => x.Length).Collect()
        ///         
@ -514,8 +516,8 @@ namespace Microsoft.Spark.CSharp.Core
        /// x = sc.Parallelize(
        ///         new[] 
        ///         { 
-        ///             new <see cref="KeyValuePair{string, string[]}"/>("a", new[]{"x", "y", "z"}), 
-        ///             new <see cref="KeyValuePair{string, string[]}"/>("b", new[]{"p", "r"})
+        ///             new KeyValuePair&lt;string, string[]>("a", new[]{"x", "y", "z"}), 
+        ///             new KeyValuePair&lt;string, string[]>("b", new[]{"p", "r"})
        ///         }, 2)
        ///         .FlatMapValues(x => x).Collect()
        ///         
@ -534,7 +536,7 @@ namespace Microsoft.Spark.CSharp.Core
        }

        /// <summary>
-        /// explicitly convert KeyValuePair<K, V> to KeyValuePair<K, dynamic>
+        /// explicitly convert KeyValuePair&lt;K, V> to KeyValuePair&lt;K, dynamic>
        /// since they are incompatibles types unlike V to dynamic
        /// </summary>
        /// <typeparam name="K"></typeparam>
@ -566,8 +568,8 @@ namespace Microsoft.Spark.CSharp.Core
        /// For each key k in this RDD or <paramref name="other"/>, return a resulting RDD that
        /// contains a tuple with the list of values for that key in this RDD as well as <paramref name="other"/>.
        /// 
-        /// var x = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int}"/>("a", 1), new <see cref="KeyValuePair{string, int}"/>("b", 4) }, 2);
-        /// var y = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int}"/>("a", 2) }, 1);
+        /// var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 2);
+        /// var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
        /// x.GroupWith(y).Collect();
        /// 
        /// [('a', ([1], [2])), ('b', ([4], []))]
@ -608,9 +610,9 @@ namespace Microsoft.Spark.CSharp.Core
        }

        /// <summary>
-        /// var x = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int}"/>("a", 5), new <see cref="KeyValuePair{string, int}"/>("b", 6) }, 2);
-        /// var y = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int}"/>("a", 1), new <see cref="KeyValuePair{string, int}"/>("b", 4) }, 2);
-        /// var z = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int}"/>("a", 2) }, 1);
+        /// var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 5), new KeyValuePair&lt;string, int>("b", 6) }, 2);
+        /// var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 2);
+        /// var z = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
        /// x.GroupWith(y, z).Collect();
        /// </summary>
        /// <typeparam name="K"></typeparam>
@ -653,10 +655,10 @@ namespace Microsoft.Spark.CSharp.Core
        }

        /// <summary>
-        /// var x = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int}"/>("a", 5), new <see cref="KeyValuePair{string, int}"/>("b", 6) }, 2);
-        /// var y = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int}"/>("a", 1), new <see cref="KeyValuePair{string, int}"/>("b", 4) }, 2);
-        /// var z = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int}"/>("a", 2) }, 1);
-        /// var w = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int}"/>("b", 42) }, 1);
+        /// var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 5), new KeyValuePair&lt;string, int>("b", 6) }, 2);
+        /// var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 2);
+        /// var z = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
+        /// var w = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("b", 42) }, 1);
        /// var m = x.GroupWith(y, z, w).MapValues(l => string.Join(" ", l.Item1) + " : " + string.Join(" ", l.Item2) + " : " + string.Join(" ", l.Item3) + " : " + string.Join(" ", l.Item4)).Collect();
        /// </summary>
        /// <typeparam name="K"></typeparam>
@ -711,7 +713,7 @@ namespace Microsoft.Spark.CSharp.Core
        // /// 
        // /// var fractions = new <see cref="Dictionary{string, double}"/> { { "a", 0.2 }, { "b", 0.1 } };
        // /// var rdd = sc.Parallelize(fractions.Keys.ToArray(), 2).Cartesian(sc.Parallelize(Enumerable.Range(0, 1000), 2));
-        // /// var sample = rdd.Map(t => new <see cref="KeyValuePair{string, int}"/>(t.Item1, t.Item2)).SampleByKey(false, fractions, 2).GroupByKey().Collect();
+        // /// var sample = rdd.Map(t => new KeyValuePair&lt;string, int>(t.Item1, t.Item2)).SampleByKey(false, fractions, 2).GroupByKey().Collect();
        // /// 
        // /// 100 &lt; sample["a"].Length &lt; 300 and 50 &lt; sample["b"].Length &lt; 150
        // /// true
@ -743,8 +745,8 @@ namespace Microsoft.Spark.CSharp.Core
        /// <summary>
        /// Return each (key, value) pair in this RDD that has no pair with matching key in <paramref name="other"/>.
        /// 
-        /// var x = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int?}"/>("a", 1), new <see cref="KeyValuePair{string, int?}"/>("b", 4), new <see cref="KeyValuePair{string, int?}"/>("b", 5), new <see cref="KeyValuePair{string, int?}"/>("a", 2) }, 2);
-        /// var y = sc.Parallelize(new[] { new <see cref="KeyValuePair{string, int?}"/>("a", 3), new <see cref="KeyValuePair{string, int?}"/>("c", null) }, 2);
+        /// var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int?>("a", 1), new KeyValuePair&lt;string, int?>("b", 4), new KeyValuePair&lt;string, int?>("b", 5), new KeyValuePair&lt;string, int?>("a", 2) }, 2);
+        /// var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int?>("a", 3), new KeyValuePair&lt;string, int?>("c", null) }, 2);
        /// x.SubtractByKey(y).Collect();
        /// 
        /// [('b', 4), ('b', 5)]
@ -768,7 +770,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// searching the partition that the key maps to.
        /// 
        /// >>> l = range(1000)
-        /// >>> rdd = sc.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) => new <see cref="KeyValuePair{int, int}"/>(x, y)), 10)
+        /// >>> rdd = sc.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) => new KeyValuePair&lt;int, int>(x, y)), 10)
        /// >>> rdd.lookup(42)
        /// [42]
        /// 
@ -917,20 +919,42 @@ namespace Microsoft.Spark.CSharp.Core
        }
        
        [Serializable]
-        private class AddShuffleKeyHelper<K1, V1>
+        internal class AddShuffleKeyHelper<K, V>
        {
            [NonSerialized]
-            private static MD5 md5 = MD5.Create();
-            public IEnumerable<byte[]> Execute(int split, IEnumerable<KeyValuePair<K1, V1>> input)
+            private MD5 md5 = MD5.Create();
+            private readonly int numPartitions;
+            private readonly Func<dynamic, int> partitionFunc = null;
+
+            public AddShuffleKeyHelper(int numPartitions, Func<dynamic, int> partitionFunc = null)
            {
+                this.numPartitions = numPartitions;
+                this.partitionFunc = partitionFunc;
+            }
+
+            public IEnumerable<byte[]> Execute(int split, IEnumerable<KeyValuePair<K, V>> input)
+            {
+                // make sure that md5 is not null even if it is deseriazed in C# worker
+                if (md5 == null)
+                {
+                    md5 = MD5.Create();
+                }
                IFormatter formatter = new BinaryFormatter();
-                foreach (var kvp in input)
+                foreach (var kv in input)
                {
                    var ms = new MemoryStream();
-                    formatter.Serialize(ms, kvp.Key);
-                    yield return md5.ComputeHash(ms.ToArray()).Take(8).ToArray();
+                    if (partitionFunc == null)
+                    {
+                        formatter.Serialize(ms, kv.Key);
+                        yield return md5.ComputeHash(ms.ToArray()).Take(8).ToArray();
+                    }
+                    else
+                    {
+                        long pid = (long)(partitionFunc(kv.Key) % numPartitions);
+                        yield return SerDe.ToBytes(pid);
+                    }
                    ms = new MemoryStream();
-                    formatter.Serialize(ms, kvp);
+                    formatter.Serialize(ms, kv);
                    yield return ms.ToArray();
                }
            }
@ -983,9 +1007,43 @@ namespace Microsoft.Spark.CSharp.Core
            }
        }

+        [Serializable]
+        internal class PartitionFuncDynamicTypeHelper<K>
+        {
+            private readonly Func<K, int> func;
+            internal PartitionFuncDynamicTypeHelper(Func<K, int> f)
+            {
+                this.func = f;
+            }
+            internal int Execute(dynamic input)
+            {
+                return func((K)input);
+            }
+        }
+
+        /// <summary>
+        /// Converts a collection to a list where the element type is Option(T) type.
+        /// If the collection is empty, just returns the empty list.
+        /// </summary>
+        /// <param name="list">The collection that be inputted to convert</param>
+        /// <typeparam name="T">The element type in the collection</typeparam>
+        /// <returns>A list that use Option(T) as element type</returns>
        public static List<Option<T>> NullIfEmpty<T>(this IEnumerable<T> list)
        {
            return list.Any() ? list.Select(v => new Option<T>(v)).ToList() : new List<Option<T>>() { new Option<T>() };
        }
+
+        private static long GenerateObjectId(object obj)
+        {
+            if (obj == null)
+                return 0;
+
+            MD5 md5 = MD5.Create();
+            IFormatter formatter = new BinaryFormatter();
+            var ms = new MemoryStream();
+            formatter.Serialize(ms, obj);
+            var hash = md5.ComputeHash(ms.ToArray());
+            return BitConverter.ToInt64(hash.Take(8).ToArray(), 0);
+        }
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/Partitioner.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/Partitioner.cs
@ -0,0 +1,62 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+
+namespace Microsoft.Spark.CSharp.Core
+{
+    /// <summary>
+    /// An object that defines how the elements in a key-value pair RDD are partitioned by key.
+    /// Maps each key to a partition ID, from 0 to "numPartitions - 1".
+    /// </summary>
+    [Serializable]
+    public class Partitioner
+    {
+        private readonly int numPartitions;
+        private readonly Func<dynamic, int> partitionFunc;
+
+        /// <summary>
+        /// Create a <seealso cref="Partitioner"/> instance.
+        /// </summary>
+        /// <param name="numPartitions">Number of partitions.</param>
+        /// <param name="partitionFunc">Defines how the elements in a key-value pair RDD are partitioned by key. Input of Func is key, output is partition index.
+        /// Warning: diffrent Func instances are considered as different partitions which will cause repartition.</param>
+        public Partitioner(int numPartitions, Func<dynamic, int> partitionFunc)
+        {
+            this.numPartitions = numPartitions;
+            this.partitionFunc = partitionFunc;
+        }
+
+        /// <summary>
+        /// Determines whether the specified object is equal to the current object.
+        /// </summary>
+        /// <returns>
+        /// true if the specified object  is equal to the current object; otherwise, false.
+        /// </returns>
+        /// <param name="obj">The object to compare with the current object. </param>
+        public override bool Equals(object obj)
+        {
+            if (ReferenceEquals(null, obj)) return false;
+            if (ReferenceEquals(this, obj)) return true;
+
+            var otherPartitioner = obj as Partitioner;
+            if (otherPartitioner != null)
+            {
+                return otherPartitioner.numPartitions == numPartitions && otherPartitioner.partitionFunc == partitionFunc;
+            }
+
+            return base.Equals(obj);
+        }
+
+        /// <summary>
+        /// Serves as the default hash function. 
+        /// </summary>
+        /// <returns>
+        /// A hash code for the current object.
+        /// </returns>
+        public override int GetHashCode()
+        {
+            return base.GetHashCode();
+        }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/PipelinedRDD.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/PipelinedRDD.cs
@ -19,7 +19,7 @@ namespace Microsoft.Spark.CSharp.Core

    /// <summary>
    /// Wraps C#-based transformations that can be executed within a stage. It helps avoid unnecessary Ser/De of data between
-    /// JVM & CLR to execute C# transformations and pipelines them
+    /// JVM and CLR to execute C# transformations and pipelines them
    /// </summary>
    /// <typeparam name="U"></typeparam>
    [Serializable]
@ -29,6 +29,14 @@ namespace Microsoft.Spark.CSharp.Core
        internal bool preservesPartitioning;

        //TODO - give generic types a better id
+        /// <summary>
+        /// Return a new RDD by applying a function to each partition of this RDD,
+        /// while tracking the index of the original partition.
+        /// </summary>
+        /// <typeparam name="U1">The element type</typeparam>
+        /// <param name="newFunc">The function to be applied to each partition</param>
+        /// <param name="preservesPartitioningParam">Indicates if it preserves partition parameters</param>
+        /// <returns>A new RDD</returns>
        public override RDD<U1> MapPartitionsWithIndex<U1>(Func<int, IEnumerable<U>, IEnumerable<U1>> newFunc, bool preservesPartitioningParam = false)
        {
            if (IsPipelinable())
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/Profiler.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/Profiler.cs
@ -10,6 +10,9 @@ using System.Threading.Tasks;
 namespace Microsoft.Spark.CSharp.Core
 {
    //TODO - complete the impl
+    /// <summary>
+    /// A class represents a profiler
+    /// </summary>
    public class Profiler
    {
    }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
@ -5,6 +5,7 @@ using System;
 using System.Collections.Generic;
 using System.Linq;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Services;

 namespace Microsoft.Spark.CSharp.Core
 {
@ -18,16 +19,29 @@ namespace Microsoft.Spark.CSharp.Core
    [Serializable]
    public class RDD<T>
    {
+        [NonSerialized]
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(RDD<T>));
+
        internal IRDDProxy rddProxy;
        internal IRDDProxy previousRddProxy;
+        // There should be only one SparkContext instance per application, mark it as NonSerialized to avoid more than one SparkContext instances created.
+        // Need to set this field with a valid SparkContext instance after deserialization.
+        [NonSerialized]
        internal SparkContext sparkContext;
        internal SerializedMode serializedMode; //used for deserializing data before processing in C# worker
        internal SerializedMode prevSerializedMode;

+        /// <summary>
+        /// Indicates whether the RDD is cached.
+        /// </summary>
        protected bool isCached;
+
+        /// <summary>
+        /// Indicates whether the RDD is checkpointed.
+        /// </summary>
        protected bool isCheckpointed;
        internal bool bypassSerializer;
-        internal int? partitioner;
+        internal Partitioner partitioner;

        internal virtual IRDDProxy RddProxy
        {
@ -108,6 +122,7 @@ namespace Microsoft.Spark.CSharp.Core
        public RDD<T> Cache()
        {
            isCached = true;
+            logger.LogInfo("Persisting RDD to default storage cache");
            RddProxy.Cache();
            return this;
        }
@ -127,6 +142,7 @@ namespace Microsoft.Spark.CSharp.Core
        public RDD<T> Persist(StorageLevelType storageLevelType)
        {
            isCached = true;
+            logger.LogInfo("Persisting RDD to storage level type {0}", storageLevelType);
            RddProxy.Persist(storageLevelType);
            return this;
        }
@ -140,6 +156,7 @@ namespace Microsoft.Spark.CSharp.Core
            if (isCached)
            {
                isCached = false;
+                logger.LogInfo("Unpersisting RDD from the cache");
                RddProxy.Unpersist();
            }
            return this;
@ -156,10 +173,15 @@ namespace Microsoft.Spark.CSharp.Core
        public void Checkpoint()
        {
            isCheckpointed = true;
+            logger.LogInfo("Checkpointing RDD to SparkContext.SetCheckpointDir");
            RddProxy.Checkpoint();
        }

-        internal int GetNumPartitions()
+        /// <summary>
+        /// Returns the number of partitions of this RDD.
+        /// </summary>
+        /// <returns>The number of partitions of this RDD</returns>
+        public int GetNumPartitions()
        {
            return RddProxy.GetNumPartitions();
        }
@ -167,7 +189,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// <summary>
        /// Return a new RDD by applying a function to each element of this RDD.
        /// 
-        /// sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x => new <see cref="KeyValuePair{string, int}"/>(x, 1)).Collect()
+        /// sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x => new KeyValuePair&lt;string, int>(x, 1)).Collect()
        /// [('a', 1), ('b', 1), ('c', 1)]
        /// 
        /// </summary>
@ -177,6 +199,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// <returns></returns>
        public RDD<U> Map<U>(Func<T, U> f, bool preservesPartitioning = false)
        {
+            logger.LogInfo("Executing Map operation on RDD (preservesPartitioning={0})", preservesPartitioning);
            return MapPartitionsWithIndex(new MapHelper<T, U>(f).Execute, preservesPartitioning);
        }

@ -217,7 +240,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// Return a new RDD by applying a function to each partition of this RDD,
        /// while tracking the index of the original partition.
        /// 
-        /// <see cref="sc.Parallelize(new int[]{1, 2, 3, 4}, 4).MapPartitionsWithIndex{double}"/>((pid, iter) => (double)pid).Sum()
+        /// sc.Parallelize(new int[]{1, 2, 3, 4}, 4).MapPartitionsWithIndex&lt;double>((pid, iter) => (double)pid).Sum()
        /// 6
        /// </summary>
        /// <typeparam name="U"></typeparam>
@ -417,7 +440,7 @@ namespace Microsoft.Spark.CSharp.Core
        public RDD<T> Union(RDD<T> other)
        {
            var rdd = new RDD<T>(RddProxy.Union(other.RddProxy), sparkContext);
-            if (partitioner == other.partitioner && RddProxy.PartitionLength() == rdd.RddProxy.PartitionLength())
+            if (partitioner == other.partitioner && RddProxy.GetNumPartitions() == rdd.RddProxy.GetNumPartitions())
                rdd.partitioner = partitioner;
            return rdd;
        }
@ -579,6 +602,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// <returns></returns>
        public T Reduce(Func<T, T, T> f)
        {
+            logger.LogInfo("Executing Reduce operation on RDD");
            Func<int, IEnumerable<T>, IEnumerable<T>> func = new ReduceHelper<T>(f).Execute;
            var vals = MapPartitionsWithIndex(func, true).Collect();

@ -1047,6 +1071,14 @@ namespace Microsoft.Spark.CSharp.Core
        {
            return new RDD<T>(RddProxy.RandomSampleWithRange(lb, ub, seed), sparkContext);
        }
+
+        internal int GetDefaultPartitionNum()
+        {
+            var numPartitions = sparkContext.SparkConf.SparkConfProxy.GetInt("spark.default.parallelism", 0);
+            if (numPartitions == 0 && previousRddProxy != null)
+                numPartitions = previousRddProxy.GetNumPartitions();
+            return numPartitions;
+        }
    }

    /// <summary>
@ -1115,10 +1147,12 @@ namespace Microsoft.Spark.CSharp.Core
        /// <typeparam name="T"></typeparam>
        /// <param name="self"></param>
        /// <param name="num"></param>
+        /// <param name="keyFunc"></param>
        /// <returns></returns>
-        public static T[] TakeOrdered<T>(this RDD<T> self, int num) where T : IComparable<T>
+        public static T[] TakeOrdered<T>(this RDD<T> self, int num, Func<T, dynamic> keyFunc = null) where T : IComparable<T>
        {
-            return self.MapPartitionsWithIndex<T>(new TakeOrderedHelper<T>(num).Execute).Collect().OrderBy(x => x).Take(num).ToArray();
+            return self.MapPartitionsWithIndex<T>(new TakeOrderedHelper<T>(num, keyFunc).Execute).Collect()
+                .OrderBy(x => keyFunc == null ? x : keyFunc(x)).Take(num).ToArray();
        }

        /// <summary>
@ -1432,13 +1466,15 @@ namespace Microsoft.Spark.CSharp.Core
    internal class TakeOrderedHelper<T>
    {
        private readonly int num;
-        internal TakeOrderedHelper(int num)
+        private readonly Func<T, dynamic> keyFunc;
+        internal TakeOrderedHelper(int num, Func<T, dynamic> keyFunc)
        {
            this.num = num;
+            this.keyFunc = keyFunc;
        }
        internal IEnumerable<T> Execute(int pid, IEnumerable<T> input)
        {
-            return input.OrderBy(x => x).Take(num);
+            return input.OrderBy(x => keyFunc == null ? x : keyFunc(x)).Take(num);
        }
    }
    [Serializable]
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs
@ -5,12 +5,12 @@ using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Net;
-using System.Net.Sockets;
 using System.Reflection;
 using System.Runtime.Serialization;
 using System.Runtime.Serialization.Formatters.Binary;
 using System.Text;
 using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Sql;

 namespace Microsoft.Spark.CSharp.Core
@ -23,10 +23,10 @@ namespace Microsoft.Spark.CSharp.Core
        public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
        {
            IFormatter formatter = new BinaryFormatter();
-            Socket sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
+            var sock = SocketFactory.CreateSocket();
            sock.Connect(IPAddress.Loopback, port);

-            using (NetworkStream s = new NetworkStream(sock))
+            using (var s = sock.GetStream())
            {
                byte[] buffer;
                while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0)
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkConf.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkConf.cs
@ -63,6 +63,7 @@ namespace Microsoft.Spark.CSharp.Core
        public SparkConf SetMaster(string master)
        {
            sparkConfProxy.SetMaster(master);
+            logger.LogInfo("Spark master set to {0}", master);
            return this;
        }

@ -73,6 +74,7 @@ namespace Microsoft.Spark.CSharp.Core
        public SparkConf SetAppName(string appName)
        {
            sparkConfProxy.SetAppName(appName);
+            logger.LogInfo("Spark app name set to {0}", appName);
            return this;
        }

@ -84,6 +86,7 @@ namespace Microsoft.Spark.CSharp.Core
        public SparkConf SetSparkHome(string sparkHome)
        {
            sparkConfProxy.SetSparkHome(sparkHome);
+            logger.LogInfo("Spark home set to {0}", sparkHome);
            return this;
        }

@ -95,6 +98,7 @@ namespace Microsoft.Spark.CSharp.Core
        public SparkConf Set(string key, string value)
        {
            sparkConfProxy.Set(key, value);
+            logger.LogInfo("Spark configuration key-value set to {0}={1}", key, value);
            return this;
        }

--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
@ -10,14 +10,31 @@ using System.Text;

 using Microsoft.Spark.CSharp.Interop;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Services;

 namespace Microsoft.Spark.CSharp.Core
 {
+    /// <summary>
+    /// Main entry point for Spark functionality. A SparkContext represents the 
+    /// connection to a Spark cluster, and can be used to create RDDs, accumulators 
+    /// and broadcast variables on that cluster.
+    /// </summary>
    public class SparkContext
    {
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SparkContext));
        internal ISparkContextProxy SparkContextProxy { get; private set; }
        internal SparkConf SparkConf { get; private set; }

+        private static SparkContext _activeSparkContext = null;
+
+        /// <summary>
+        /// Get existing SparkContext
+        /// </summary>
+        internal static SparkContext GetActiveSparkContext()
+        {
+                return _activeSparkContext;
+        }
+
        private AccumulatorServer accumulatorServer;
        private int nextAccumulatorId;

@ -63,20 +80,32 @@ namespace Microsoft.Spark.CSharp.Core
        /// </summary>
        public StatusTracker StatusTracker { get { return new StatusTracker(SparkContextProxy.StatusTracker); } }

+        /// <summary>
+        /// Initializes a SparkContext instance with a specific master, application name, and spark home 
+        /// </summary>
+        /// <param name="master">Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local)</param>
+        /// <param name="appName">A name for your application, to display on the cluster web UI</param>
+        /// <param name="sparkHome">the path that holds spark bits</param>
        public SparkContext(string master, string appName, string sparkHome)
            : this(master, appName, sparkHome, null)
-        {
-        }
+        {}

+        /// <summary>
+        /// Initializes a SparkContext instance with a specific master and application name.
+        /// </summary>
+        /// <param name="master"></param>
+        /// <param name="appName"></param>
        public SparkContext(string master, string appName)
            : this(master, appName, null, null)
-        {
-        }
+        {}

+        /// <summary>
+        /// Initializes a SparkContext instance with a specific spark config.
+        /// </summary>
+        /// <param name="conf">A SparkConf object that represents the settings for spark</param>
        public SparkContext(SparkConf conf)
            : this(null, null, null, conf)
-        {
-        }
+        {}

        /// <summary>
        /// when created from checkpoint
@ -100,6 +129,7 @@ namespace Microsoft.Spark.CSharp.Core
                SparkConf.SetSparkHome(sparkHome);

            SparkContextProxy = SparkCLREnvironment.SparkCLRProxy.CreateSparkContext(SparkConf.SparkConfProxy);
+            _activeSparkContext = this;
        }

        internal void StartAccumulatorServer()
@ -112,8 +142,15 @@ namespace Microsoft.Spark.CSharp.Core
            }
        }

+        /// <summary>
+        /// Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.
+        /// </summary>
+        /// <param name="filePath">The path of file to be read</param>
+        /// <param name="minPartitions">A suggestion value of the minimal splitting number for input data</param>
+        /// <returns>an RDD of Strings</returns>
        public RDD<string> TextFile(string filePath, int minPartitions = 0)
        {
+            logger.LogInfo("Reading text file {0} as RDD<string> with {1} partitions", filePath, minPartitions);
            return new RDD<string>(SparkContextProxy.TextFile(filePath, minPartitions), this, SerializedMode.String);
        }

@ -142,6 +179,7 @@ namespace Microsoft.Spark.CSharp.Core
            if (numSlices < 1)
                numSlices = 1;

+            logger.LogInfo("Parallelizing {0} items to form RDD in the cluster with {1} partitions", collectionOfByteRepresentationOfObjects.Count, numSlices);
            return new RDD<T>(SparkContextProxy.Parallelize(collectionOfByteRepresentationOfObjects, numSlices), this);
        }

@ -170,7 +208,7 @@ namespace Microsoft.Spark.CSharp.Core
        ///
        /// Do
        /// {{{
-        ///   <see cref="RDD{KeyValuePair{string, string}}"/> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
+        ///   RDD&lt;KeyValuePair&lt;string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
        /// }}}
        ///
        /// then `rdd` contains
@ -208,7 +246,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// }}}
        ///
        /// Do
-        /// <see cref="RDD{KeyValuePair{string, byte[]}}"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+        /// RDD&lt;KeyValuePair&lt;string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
        ///
        /// then `rdd` contains
        /// {{{
@ -401,9 +439,16 @@ namespace Microsoft.Spark.CSharp.Core
        /// </summary>
        public void Stop()
        {
+            logger.LogInfo("Stopping SparkContext");
+            logger.LogInfo("Note that there might be error in Spark logs on the failure to delete userFiles directory " +
+                           "under Spark temp directory (spark.local.dir config value in local mode)");
+            logger.LogInfo("This error may be ignored for now. See https://issues.apache.org/jira/browse/SPARK-8333 for details");
+
            if (accumulatorServer != null)
                accumulatorServer.Shutdown();
+
            SparkContextProxy.Stop();
+
        }

        /// <summary>
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/StatCounter.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/StatCounter.cs
@ -9,6 +9,11 @@ using System.Threading.Tasks;

 namespace Microsoft.Spark.CSharp.Core
 {
+    /// <summary>
+    /// A class for tracking the statistics of a set of numbers (count, mean and variance) in a numerically
+    /// robust way. Includes support for merging two StatCounters. Based on Welford and Chan's algorithms
+    /// for running variance. 
+    /// </summary>
    [Serializable]
    public class StatCounter
    {
@ -18,9 +23,16 @@ namespace Microsoft.Spark.CSharp.Core
        private double maxValue = double.MinValue; // Running max of our values
        private double minValue = double.MaxValue; // Running min of our values

+        /// <summary>
+        /// Initializes the StatCounter with no values.
+        /// </summary>
        public StatCounter()
        { }

+        /// <summary>
+        /// Initializes the StatCounter with the given values.
+        /// </summary>
+        /// <param name="values"></param>
        public StatCounter(IEnumerable<double> values)
        {
            Merge(values);
@ -114,10 +126,30 @@ namespace Microsoft.Spark.CSharp.Core
            other.minValue = minValue;
            return other;
        }
+
+        /// <summary>
+        /// Gets the count number of this StatCounter
+        /// </summary>
        public long Count { get { return n; } }
+
+        /// <summary>
+        /// Gets the average number of this StatCounter
+        /// </summary>
        public double Mean { get { return mu; } }
+
+        /// <summary>
+        /// Gets the sum number of this StatCounter
+        /// </summary>
        public double Sum { get { return n * mu; } }
+
+        /// <summary>
+        /// Gets the maximum number of this StatCounter
+        /// </summary>
        public double Max { get { return maxValue; } }
+
+        /// <summary>
+        /// Gets the minimum number of this StatCounter
+        /// </summary>
        public double Min { get { return minValue; } }
        
        /// <summary>
@ -139,6 +171,13 @@ namespace Microsoft.Spark.CSharp.Core
        /// Return the sample standard deviation of the values, which corrects for bias in estimating the variance by dividing by N-1 instead of N.
        /// </summary>
        public double SampleStdev { get { return Math.Sqrt(SampleVariance); } }
+
+        /// <summary>
+        /// Returns a string that represents this StatCounter.
+        /// </summary>
+        /// <returns>
+        /// A string that represents this StatCounter.
+        /// </returns>
        public override string ToString()
        {
            return string.Format("(count: {0}, mean: {1}, stdev: {2}, max: {3}, min: {4})", Count, Mean, Stdev, Max, Min);
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/StatusTracker.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/StatusTracker.cs
@ -11,6 +11,9 @@ using Microsoft.Spark.CSharp.Proxy;

 namespace Microsoft.Spark.CSharp.Core
 {
+    /// <summary>
+    /// Low-level status reporting APIs for monitoring job and stage progress.
+    /// </summary>
    public class StatusTracker
    {
        private readonly IStatusTrackerProxy statusTrackerProxy;
@ -76,11 +79,21 @@ namespace Microsoft.Spark.CSharp.Core
        }
    }

+    /// <summary>
+    /// SparkJobInfo represents a job information of Spark
+    /// </summary>
    public class SparkJobInfo
    {
        readonly int jobId;
        readonly int[] stageIds;
        readonly string status;
+
+        /// <summary>
+        /// Initializes a SparkJobInfo instance with a given job Id, stage Ids, and status
+        /// </summary>
+        /// <param name="jobId"></param>
+        /// <param name="stageIds"></param>
+        /// <param name="status"></param>
        public SparkJobInfo(int jobId, int[] stageIds, string status)
        {
            this.jobId = jobId;
@ -88,12 +101,26 @@ namespace Microsoft.Spark.CSharp.Core
            this.status = status;
        }

+        /// <summary>
+        /// Gets the Id of this Spark job
+        /// </summary>
        public int JobId { get { return jobId; } }
+
+        /// <summary>
+        /// Gets the stage Ids of this Spark job
+        /// </summary>
        public int[] StageIds { get { return stageIds; } }
+
+        /// <summary>
+        /// Gets the status of this Spark job
+        /// </summary>
        public string Status { get { return status; } }

    }

+    /// <summary>
+    /// SparkJobInfo represents a stage information of Spark
+    /// </summary>
    public class SparkStageInfo
    {
        readonly int stageId;
@ -104,6 +131,18 @@ namespace Microsoft.Spark.CSharp.Core
        readonly int numActiveTasks;
        readonly int numCompletedTasks;
        readonly int numFailedTasks;
+
+        /// <summary>
+        /// Initializes a SparkStageInfo instance with given values
+        /// </summary>
+        /// <param name="stageId">The stage Id</param>
+        /// <param name="currentAttemptId">The current attempt Id</param>
+        /// <param name="submissionTime">The submission time</param>
+        /// <param name="name">The name of this stage</param>
+        /// <param name="numTasks">The number of tasks</param>
+        /// <param name="numActiveTasks">The number of active tasks</param>
+        /// <param name="numCompletedTasks">The number of completed tasks</param>
+        /// <param name="numFailedTasks">The number of failed tasks</param>
        public SparkStageInfo(int stageId, int currentAttemptId, long submissionTime, string name, int numTasks, int numActiveTasks, int numCompletedTasks, int numFailedTasks)
        {
            this.stageId = stageId;
@ -116,13 +155,44 @@ namespace Microsoft.Spark.CSharp.Core
            this.numFailedTasks = numFailedTasks;
        }

+        /// <summary>
+        /// Gets the stage Id of this SparkStageInfo
+        /// </summary>
        public int StageId { get { return stageId; } }
+
+        /// <summary>
+        /// Gets the current attempt Id of this SparkStageInfo
+        /// </summary>
        public int CurrentAttemptId { get { return currentAttemptId; } }
+
+        /// <summary>
+        /// Gets the submission time of this SparkStageInfo
+        /// </summary>
        public long SubmissionTime { get { return submissionTime; } }
+
+        /// <summary>
+        /// Gets the name of this SparkStageInfo
+        /// </summary>
        public string Name { get { return name; } }
+
+        /// <summary>
+        /// Gets the number of tasks of this SparkStageInfo
+        /// </summary>
        public int NumTasks { get { return numTasks; } }
+
+        /// <summary>
+        /// Gets the number of active tasks of this SparkStageInfo
+        /// </summary>
        public int NumActiveTasks { get { return numActiveTasks; } }
+
+        /// <summary>
+        /// Gets the number of completed tasks of this SparkStageInfo
+        /// </summary>
        public int NumCompletedTasks { get { return numCompletedTasks; } }
+
+        /// <summary>
+        /// Gets the number of failed tasks of this SparkStageInfro
+        /// </summary>
        public int NumFailedTasks { get { return numFailedTasks; } }
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/StorageLevel.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/StorageLevel.cs
@ -9,21 +9,67 @@ using System.Threading.Tasks;

 namespace Microsoft.Spark.CSharp.Core
 {
+    /// <summary>
+    /// Defines the type of storage levels
+    /// </summary>
    public enum StorageLevelType
    {
+        /// <summary>
+        /// Not specified to use any storage
+        /// </summary>
        NONE,
+        /// <summary>
+        /// Specifies to use disk only
+        /// </summary>
        DISK_ONLY,
+        /// <summary>
+        /// Specifies to use disk only with 2 replicas
+        /// </summary>
        DISK_ONLY_2,
+        /// <summary>
+        /// Specifies to use memory only
+        /// </summary>
        MEMORY_ONLY,
+        /// <summary>
+        /// Specifies to use memory only 2 replicas
+        /// </summary>
        MEMORY_ONLY_2,
+        /// <summary>
+        /// Specifies to use memory only in a serialized format
+        /// </summary>
        MEMORY_ONLY_SER,
+        /// <summary>
+        /// Specifies to use memory only in a serialized format with 2 replicas
+        /// </summary>
        MEMORY_ONLY_SER_2,
+        /// <summary>
+        /// Specifies to use disk and memory
+        /// </summary>
        MEMORY_AND_DISK,
+        /// <summary>
+        /// Specifies to use disk and memory with 2 replicas
+        /// </summary>
        MEMORY_AND_DISK_2,
+        /// <summary>
+        /// Specifies to use disk and memory in a serialized format
+        /// </summary>
        MEMORY_AND_DISK_SER,
+        /// <summary>
+        /// Specifies to use disk and memory in a serialized format with 2 replicas
+        /// </summary>
        MEMORY_AND_DISK_SER_2,
+        /// <summary>
+        /// Specifies to use off heap
+        /// </summary>
        OFF_HEAP
    }
+
+    /// <summary>
+    /// Flags for controlling the storage of an RDD. Each StorageLevel records whether to use 
+    /// memory, whether to drop the RDD to disk if it falls out of memory, whether to keep the 
+    /// data in memory in a serialized format, and whether to replicate the RDD partitions 
+    /// on multiple nodes.
+    /// </summary>
    public class StorageLevel
    {
        internal static Dictionary<StorageLevelType, StorageLevel> storageLevel = new Dictionary<StorageLevelType, StorageLevel>
@ -56,6 +102,10 @@ namespace Microsoft.Spark.CSharp.Core
            this.replication = replication;
        }

+        /// <summary>
+        /// Returns a readable string that represents the type
+        /// </summary>
+        /// <returns>A readable string</returns>
        public override string ToString()
        {
            return string.Format("{0}{1}{2}{3}{4} Replicated",
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/IJvmBridge.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/IJvmBridge.cs
@ -10,7 +10,7 @@ using System.Threading.Tasks;
 namespace Microsoft.Spark.CSharp.Interop.Ipc
 {
    /// <summary>
-    /// Behavior of the bridge used for the IPC interop between JVM & CLR
+    /// Behavior of the bridge used for the IPC interop between JVM and CLR
    /// </summary>
    internal interface IJvmBridge : IDisposable
    {
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs
@ -5,23 +5,24 @@ using System;
 using System.Collections.Concurrent;
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
+using System.IO;
 using System.Net;
-using System.Net.Sockets;
 using System.Text;
+using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Services;

 namespace Microsoft.Spark.CSharp.Interop.Ipc
 {
    /// <summary>
-    /// Implementation of thread safe IPC bridge between JVM & CLR
-    /// throught a concourrent socket connection queue (lightweight synchronisation mechanism)
+    /// Implementation of thread safe IPC bridge between JVM and CLR
+    /// Using a concurrent socket connection queue (lightweight synchronization mechanism)
    /// supporting async JVM calls like StreamingContext.AwaitTermination()
    /// </summary>
    [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
    internal class JvmBridge : IJvmBridge
    {
        private int portNumber;
-        private readonly ConcurrentQueue<Socket> sockets = new ConcurrentQueue<Socket>();
+        private readonly ConcurrentQueue<ISocketWrapper> sockets = new ConcurrentQueue<ISocketWrapper>();
        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(JvmBridge));

        public void Initialize(int portNumber)
@ -29,12 +30,12 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            this.portNumber = portNumber;
        }

-        private Socket GetConnection()
+        private ISocketWrapper GetConnection()
        {
-            Socket socket;
+            ISocketWrapper socket;
            if (!sockets.TryDequeue(out socket))
            {
-                socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
+                socket = SocketFactory.CreateSocket();
                socket.Connect(IPAddress.Loopback, portNumber);
            }
            return socket;
@ -72,8 +73,8 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            {
                var overallPayload = PayloadHelper.BuildPayload(isStatic, classNameOrJvmObjectReference, methodName, parameters);

-                Socket socket = GetConnection();
-                using (NetworkStream s = new NetworkStream(socket))
+                var socket = GetConnection();
+                using (var s = socket.GetStream())
                {
                    SerDe.Write(s, overallPayload);

@ -115,7 +116,7 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
                            break;

                        case 'l':
-                            returnValue = ReadJvmObjectReferenceCollection(s);
+                            returnValue = ReadCollection(s);

                            break;

@ -207,15 +208,56 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            return paramsString.ToString();
        }

-        private object ReadJvmObjectReferenceCollection(NetworkStream s)
+        private object ReadCollection(Stream s)
        {
            object returnValue;
            var listItemTypeAsChar = Convert.ToChar(s.ReadByte());
+            int numOfItemsInList = SerDe.ReadInt(s);
            switch (listItemTypeAsChar)
            {
+                case 'c':
+                    var strList = new List<string>();
+                    for (int itemIndex = 0; itemIndex < numOfItemsInList; itemIndex++)
+                    {
+                        strList.Add(SerDe.ReadString(s));
+                    }
+                    returnValue = strList;
+                    break;
+                case 'i':
+                    var intList = new List<int>();
+                    for (int itemIndex = 0; itemIndex < numOfItemsInList; itemIndex++)
+                    {
+                        intList.Add(SerDe.ReadInt(s));
+                    }
+                    returnValue = intList;
+                    break;
+                case 'd':
+                    var doubleList = new List<double>();
+                    for (int itemIndex = 0; itemIndex < numOfItemsInList; itemIndex++)
+                    {
+                        doubleList.Add(SerDe.ReadDouble(s));
+                    }
+                    returnValue = doubleList;
+                    break;
+                case 'b':
+                    var boolList = new List<bool>();
+                    for (int itemIndex = 0; itemIndex < numOfItemsInList; itemIndex++)
+                    {
+                        boolList.Add(Convert.ToBoolean(s.ReadByte()));
+                    }
+                    returnValue = boolList;
+                    break;
+                case 'r':
+                    var byteArrayList = new List<byte[]>();
+                    for (int itemIndex = 0; itemIndex < numOfItemsInList; itemIndex++)
+                    {
+                        var byteArrayLen = SerDe.ReadInt(s);
+                        byteArrayList.Add(SerDe.ReadBytes(s, byteArrayLen));
+                    }
+                    returnValue = byteArrayList;
+                    break;
                case 'j':
                    var jvmObjectReferenceList = new List<JvmObjectReference>();
-                    var numOfItemsInList = SerDe.ReadInt(s);
                    for (int itemIndex = 0; itemIndex < numOfItemsInList; itemIndex++)
                    {
                        var itemIdentifier = SerDe.ReadString(s);
@ -223,7 +265,6 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
                    }
                    returnValue = jvmObjectReferenceList;
                    break;
-
                default:
                    // convert listItemTypeAsChar to UInt32 because the char may be non-printable
                    throw new NotSupportedException(
@ -235,13 +276,12 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc

        public void Dispose()
        {
-            Socket socket;
+            ISocketWrapper socket;
            while (sockets.TryDequeue(out socket))
            {
                if (socket != null)
                {
                    socket.Dispose();
-                    socket = null;
                }
            }
        }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridgeUtils.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridgeUtils.cs
@ -0,0 +1,80 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
+
+namespace Microsoft.Spark.CSharp.Interop.Ipc
+{
+    /// <summary>
+    /// Utility methods for C#-JVM interaction
+    /// </summary>
+    [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
+    internal static class JvmBridgeUtils
+    {
+        public static JvmObjectReference GetJavaMap<K, V>(IEnumerable<KeyValuePair<K, V>> enumerable)
+        {
+            var jmap = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
+            if (enumerable != null)
+            {
+                foreach (var item in enumerable)
+                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jmap, "put", new object[] { item.Key, item.Value });
+            }
+            return jmap;
+        }
+
+        public static JvmObjectReference GetJavaHashMap<K, V>(IEnumerable<KeyValuePair<K, V>> enumerable)
+        {
+            var jmap = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.HashMap", new object[] { });
+            if (enumerable != null)
+            {
+                foreach (var item in enumerable)
+                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jmap, "put", new object[] { item.Key, item.Value });
+            }
+            return jmap;
+        }
+
+        public static JvmObjectReference GetScalaMutableMap<K, V>(Dictionary<K, V> mapValues)
+        {
+            var hashMapReference = GetJavaHashMap(mapValues.Select(kvp => kvp));
+            return new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.JvmBridgeUtils", "toMutableMap", new object[] { hashMapReference }).ToString());
+        }
+
+        public static JvmObjectReference GetJavaSet<T>(IEnumerable<T> enumerable)
+        {
+            var jset = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.HashSet", new object[] { });
+            if (enumerable != null)
+            {
+                foreach (var item in enumerable)
+                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jset, "add", new object[] { item });
+            }
+            return jset;
+        }
+
+        public static JvmObjectReference GetJavaList<T>(IEnumerable<T> enumerable)
+        {
+            var jlist = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });
+            if (enumerable != null)
+            {
+                foreach (var item in enumerable)
+                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jlist, "add", new object[] { item });
+            }
+            return jlist;
+        }
+
+        public static JvmObjectReference GetJavaSeq<T>(IEnumerable<T> enumerable)
+        {
+            return new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "toSeq", GetJavaList<T>(enumerable)));
+        }
+
+        public static JvmObjectReference GetJavaDuration(int durationSeconds)
+        {
+            // java expects Duration in mini seconds and must be of long type
+            return SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { (long)durationSeconds * 1000 });
+        }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs
@ -2,8 +2,10 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.

 using System;
+using System.Runtime.CompilerServices;
 using Microsoft.Spark.CSharp.Proxy.Ipc;

+[assembly: InternalsVisibleTo("Microsoft.Spark.CSharp.Utils")]
 namespace Microsoft.Spark.CSharp.Interop.Ipc
 {
    /// <summary>
@ -19,6 +21,7 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
        {
            Id = jvmReferenceId;
            creationTime = DateTime.UtcNow;
+            SparkCLREnvironment.WeakObjectManager.AddWeakRefereceObject(this);
        }

        public override string ToString()
@ -40,6 +43,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            return base.Equals(obj);
        }

+        public override int GetHashCode()
+        {
+            return base.GetHashCode();
+        }
+
        public string GetDebugInfo()
        {
            var javaObjectReferenceForClassObject = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "getClass").ToString());
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/SerDe.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/SerDe.cs
@ -12,37 +12,84 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
    /// </summary>
    public enum SpecialLengths : int
    {
+        /// <summary>
+        /// Flag to indicate the end of data section
+        /// </summary>
        END_OF_DATA_SECTION = -1,
+
+        /// <summary>
+        /// Flag to indicate an exception thrown from .NET side
+        /// </summary>
        DOTNET_EXCEPTION_THROWN = -2,
+
+        /// <summary>
+        /// Flag to indicate a timing data
+        /// </summary>
        TIMING_DATA = -3,
+
+        /// <summary>
+        /// Flag to indicate the end of stream
+        /// </summary>
        END_OF_STREAM = -4,
+
+        /// <summary>
+        /// Flag to indicate non-defined type
+        /// </summary>
        NULL = -5,
    }

 /// <summary>
-    /// Serialization and Deserialization of data types between JVM & CLR
+    /// Serialization and Deserialization of data types between JVM and CLR
    /// </summary>
    public class SerDe //TODO - add ToBytes() for other types
    {
+        /// <summary>
+        /// The total number of read
+        /// </summary>
        public static long totalReadNum = 0;
+
+        /// <summary>
+        /// The total number of write
+        /// </summary>
        public static long totalWriteNum = 0;

+        /// <summary>
+        /// Converts a boolean to a byte array
+        /// </summary>
+        /// <param name="value">The boolean to be converted</param>
+        /// <returns>The byte array converted from a boolean</returns>
        public static byte[] ToBytes(bool value)
        {
            return new[] { System.Convert.ToByte(value) };
        }
-
+        
+        /// <summary>
+        /// Converts a string to a byte array.
+        /// </summary>
+        /// <param name="value">The string to be converted</param>
+        /// <returns>The byte array converted from a string</returns>
        public static byte[] ToBytes(string value)
        {
            return Encoding.UTF8.GetBytes(value);
        }

+        /// <summary>
+        /// Converts an integer to a byte array
+        /// </summary>
+        /// <param name="value">The intger to be converted</param>
+        /// <returns>The byte array converted from an integer</returns>
        public static byte[] ToBytes(int value)
        {
            var byteRepresentationofInputLength = BitConverter.GetBytes(value);
            Array.Reverse(byteRepresentationofInputLength);
            return byteRepresentationofInputLength;
        }
+
+        /// <summary>
+        /// Converts a long integer to a byte array
+        /// </summary>
+        /// <param name="value">The long intger to be converted</param>
+        /// <returns>The byte array converted from a long integer</returns>
        public static byte[] ToBytes(long value)
        {
            var byteRepresentationofInputLength = BitConverter.GetBytes(value);
@ -50,6 +97,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            return byteRepresentationofInputLength;
        }

+        /// <summary>
+        /// Converts a double to a byte array
+        /// </summary>
+        /// <param name="value">The double to be converted</param>
+        /// <returns>The byte array converted from a double</returns>
        public static byte[] ToBytes(double value)
        {
            var byteRepresentationofInputLength = BitConverter.GetBytes(value);
@ -57,16 +109,31 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            return byteRepresentationofInputLength;
        }

+        /// <summary>
+        /// Converts a byte to a character
+        /// </summary>
+        /// <param name="value">The byte to be converted</param>
+        /// <returns>The char converted from a byte</returns>
        public static char ToChar(byte value)
        {
            return System.Convert.ToChar(value);
        }

+        /// <summary>
+        /// Converts a byte array to a string
+        /// </summary>
+        /// <param name="value">The byte array to be converted</param>
+        /// <returns>The string converted from a byte array</returns>
        public static string ToString(byte[] value)
        {
            return Encoding.UTF8.GetString(value);
        }

+        /// <summary>
+        /// Converts a byte array to an integer
+        /// </summary>
+        /// <param name="value">The byte array to be converted</param>
+        /// <returns>The integer converted from a byte array</returns>
        public static int ToInt(byte[] value)
        {
            return //Netty byte order is BigEndian
@ -76,11 +143,21 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
                (int)value[0] << 24;
        } 

+        /// <summary>
+        /// Reads an integer from a stream
+        /// </summary>
+        /// <param name="s">The stream to be read</param>
+        /// <returns>The integer read from stream</returns>
        public static int ReadInt(Stream s)
        {
            return ToInt(ReadBytes(s, 4));
-        } 
-        
+        }
+
+        /// <summary>
+        /// Reads a long integer from a stream
+        /// </summary>
+        /// <param name="s">The stream to be read</param>
+        /// <returns>The long integer read from stream</returns>
        public static long ReadLong(Stream s)
        {
            byte[] buffer = ReadBytes(s, 8);
@ -94,7 +171,12 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
                (long)buffer[1] << 48 |
                (long)buffer[0] << 56;
        }
-        
+
+        /// <summary>
+        /// Reads a double from a stream
+        /// </summary>
+        /// <param name="s">The stream to be read</param>
+        /// <returns>The double read from stream</returns>
        public static double ReadDouble(Stream s)
        {
            byte[] buffer = ReadBytes(s, 8);
@ -102,11 +184,24 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            return BitConverter.ToDouble(buffer, 0);
        }
        
+        /// <summary>
+        /// Reads a string from a stream
+        /// </summary>
+        /// <param name="s">The stream to be read</param>
+        /// <returns>The string read from stream</returns>
        public static string ReadString(Stream s)
        {
            return ToString(ReadBytes(s));
        }
-        
+
+        /// <summary>
+        /// Reads a byte array with a given length from a stream
+        /// </summary>
+        /// <param name="s">The stream to be read</param>
+        /// <param name="length">The length to be read</param>
+        /// <returns>The a byte array read from stream</returns>
+        /// <exception cref="ArgumentOutOfRangeException">An ArgumentOutOfRangeException thrown if the given length is negative</exception>
+        /// <exception cref="ArgumentException">An ArgumentException if the actual read length is less than the given length</exception>
        public static byte[] ReadBytes(Stream s, int length)
        {
            if (length < 0)
@ -139,6 +234,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            return buffer;
        }
        
+        /// <summary>
+        /// Reads a byte array from a stream. The first 4 bytes indicate the length of a byte array.
+        /// </summary>
+        /// <param name="s">The stream to be read</param>
+        /// <returns>The byte array read from stream</returns>
        public static byte[] ReadBytes(Stream s)
        {
            var lengthBuffer = ReadBytes(s, 4);
@ -152,6 +252,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            return ReadBytes(s, length);
        }
        
+        /// <summary>
+        /// Read an object Id from a stream.
+        /// </summary>
+        /// <param name="s">The stream to be read</param>
+        /// <returns>The object Id read from stream</returns>
        public static string ReadObjectId(Stream s)
        {
            var type = s.ReadByte();
@ -168,18 +273,33 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            return ReadString(s);
        }
        
+        /// <summary>
+        /// Writes a byte to a stream
+        /// </summary>
+        /// <param name="s">The stream to write</param>
+        /// <param name="value">The byte to write</param>
        public static void Write(Stream s, byte value)
        {
            s.WriteByte(value);
            totalWriteNum += 1;
        }

+        /// <summary>
+        /// Writes a byte array to a stream
+        /// </summary>
+        /// <param name="s">The stream to write</param>
+        /// <param name="value">The byte array to write</param>
        public static void Write(Stream s, byte[] value)
        {
            s.Write(value, 0, value.Length);
            totalWriteNum += value.Length;
        }

+        /// <summary>
+        /// Writes an integer to a stream
+        /// </summary>
+        /// <param name="s">The stream to write</param>
+        /// <param name="value">The integer to write</param>
        public static void Write(Stream s, int value)
        {
            Write(s, new byte[] { 
@ -190,6 +310,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            });
        }

+        /// <summary>
+        /// Writes a long integer to a stream
+        /// </summary>
+        /// <param name="s">The stream to write</param>
+        /// <param name="value">The long integer to write</param>
        public static void Write(Stream s, long value)
        {
            Write(s, new byte[] { 
@ -204,6 +329,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            });
        }

+        /// <summary>
+        /// Writes a double to a stream
+        /// </summary>
+        /// <param name="s">The stream to write</param>
+        /// <param name="value">The double to write</param>
        public static void Write(Stream s, double value)
        {
            byte[] buffer = BitConverter.GetBytes(value);
@ -211,6 +341,11 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
            Write(s, buffer);
        }

+        /// <summary>
+        /// Writes a string to a stream
+        /// </summary>
+        /// <param name="s">The stream to write</param>
+        /// <param name="value">The string to write</param>
        public static void Write(Stream s, string value)
        {
            byte[] buffer = Encoding.UTF8.GetBytes(value);
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/WeakObjectManager.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/WeakObjectManager.cs
@ -0,0 +1,261 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
+using Microsoft.Spark.CSharp.Services;
+
+namespace Microsoft.Spark.CSharp.Interop.Ipc
+{
+    using WeakReferenceObjectIdPair = KeyValuePair<WeakReference, string>;
+
+    /// <summary>
+    /// Release JVMObjectTracker oject reference.
+    /// The reason is for the inter-operation from CSharp to Java :
+    /// 1.Java-side: https://github.com/Microsoft/Mobius/blob/master/scala/src/main/org/apache/spark/api/csharp/CSharpBackendHandler.scala#L269
+    ///     JVMObjectTracker keep a HashMap[String, Object] which is [id, Java-object]
+    /// 2.CSharp-side :
+    /// 1) JvmObjectReference remember the id : https://github.com/Microsoft/Mobius/blob/master/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs#L20 
+    /// 2) So JvmBridge can call java object's method https://github.com/Microsoft/Mobius/blob/master/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs#L69
+    /// 
+    /// So potential memory leak can happen in JVMObjectTracker.
+    /// To solve this, track the garbage collection in CSharp side, get the id, release JVMObjectTracker's HashMap. 
+    /// </summary>
+    internal interface IWeakObjectManager : IDisposable
+    {
+        TimeSpan CheckInterval { get; set; }
+
+        void AddWeakRefereceObject(JvmObjectReference obj);
+
+        /// <summary>
+        /// Gets all weak object count including non-alive objects that wait for releasing.
+        /// </summary>
+        int GetReferencesCount();
+
+        /// <summary>
+        /// Gets alive weak object count
+        /// </summary>
+        /// <returns></returns>
+        int GetAliveCount();
+    }
+
+    /// <summary>
+    /// adaptively control the number of weak objects that should be checked for each interval
+    /// <summary>
+    internal class WeakReferenceCheckCountController
+    {
+        private static readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(WeakReferenceCheckCountController));
+
+        private int checkCount;
+        private int referencesCountBenchmark;
+
+        public WeakReferenceCheckCountController(int initialCheckCount, int initialReferencesCountBenchmark)
+        {
+            checkCount = initialCheckCount;
+            referencesCountBenchmark = initialReferencesCountBenchmark;
+        }
+
+        /// <summary>
+        /// Adjust checkCount adaptively according to current weak reference objects count
+        /// </summary>
+        public int AdjustCheckCount(int currentReferenceCount)
+        {
+            if (currentReferenceCount > (referencesCountBenchmark + referencesCountBenchmark / 2))
+            {
+                int previousCheckCount = checkCount;
+                int previousReferencesCountBenchmark = referencesCountBenchmark;
+                checkCount *= 2;
+                referencesCountBenchmark = referencesCountBenchmark + referencesCountBenchmark / 2;
+                logger.LogInfo("Adjust checkCount from {0} to {1}, referencesCountBenchmark from {2} to {3}",
+                    previousCheckCount, checkCount, previousReferencesCountBenchmark, referencesCountBenchmark);
+            }
+            return checkCount;
+        }
+    }
+
+    internal class WeakObjectManagerImpl : IWeakObjectManager
+    {
+        private static readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(WeakObjectManagerImpl));
+
+        internal static TimeSpan DefaultCheckInterval = TimeSpan.FromSeconds(3);
+        private TimeSpan checkInterval;
+
+        private WeakReferenceCheckCountController checkCountController = new WeakReferenceCheckCountController(10, 1000);
+
+        /// <summary>
+        /// Sleep time for checking thread
+        /// </summary>
+        public TimeSpan CheckInterval
+        {
+            get
+            {
+                return checkInterval;
+            }
+            set
+            {
+                checkInterval = value;
+            }
+        }
+
+        /// <summary>
+        /// Maximum running duration for checking thread each time
+        /// </summary>
+        private static readonly TimeSpan MaxReleasingDuration = TimeSpan.FromMilliseconds(100);
+
+        private readonly ConcurrentQueue<WeakReferenceObjectIdPair> weakReferences = new ConcurrentQueue<WeakReferenceObjectIdPair>();
+
+        private bool shouldKeepRunning = true;
+
+        private IObjectReleaser objectReleaser = new JvmObjectReleaser();
+
+        internal IObjectReleaser ObjectReleaser
+        {
+            set { objectReleaser = value; }
+        }
+
+        private Thread releaserThread;
+
+        internal WeakObjectManagerImpl(TimeSpan checkIntervalTimeSpan)
+        {
+            checkInterval = checkIntervalTimeSpan;
+            releaserThread = new Thread(RunReleaseObjectLoop) { IsBackground = true };
+            releaserThread.Start();
+        }
+
+        internal WeakObjectManagerImpl() : this(DefaultCheckInterval) { }
+
+        public int GetReferencesCount()
+        {
+            return weakReferences.Count;
+        }
+
+        private void RunReleaseObjectLoop()
+        {
+            logger.LogInfo("Checking objects thread start ...");
+            while (shouldKeepRunning)
+            {
+                ReleseGarbageCollectedObjects();
+                Thread.Sleep(CheckInterval);
+            }
+
+            logger.LogDebug("Checking objects thread stopped.");
+        }
+
+        ~WeakObjectManagerImpl()
+        {
+            Dispose();
+        }
+
+        public void AddWeakRefereceObject(JvmObjectReference obj)
+        {
+            if (obj == null || string.IsNullOrEmpty(obj.Id))
+            {
+                logger.LogWarn("Not add null weak object or id : {0}", obj);
+                return;
+            }
+
+            weakReferences.Enqueue(new WeakReferenceObjectIdPair(new WeakReference(obj), obj.ToString()));
+        }
+
+        private void ReleseGarbageCollectedObjects()
+        {
+            int referencesCount = weakReferences.Count;
+            if (referencesCount == 0)
+            {
+                logger.LogDebug("check begin : quit as weakReferences.Count = 0");
+                return;
+            }
+
+            var beginTime = DateTime.Now;
+            int checkCount = checkCountController.AdjustCheckCount(referencesCount);
+            logger.LogDebug("check begin : weakReferences.Count = {0}, checkCount: {1}", referencesCount, checkCount);
+            int garbageCount;
+            var aliveList = ReleseGarbageCollectedObjects(checkCount, out garbageCount);
+
+            var timeReleaseGarbage = DateTime.Now;
+            aliveList.ForEach(item => weakReferences.Enqueue(item));
+            var timeStoreAlive = DateTime.Now;
+
+            logger.LogInfo("check end : released {0} garbage, remain {1} alive, used {2} ms : release garbage used {3} ms, store alive used {4} ms",
+                    garbageCount, weakReferences.Count, (DateTime.Now - beginTime).TotalMilliseconds,
+                    (timeReleaseGarbage - beginTime).TotalMilliseconds,
+                    (timeStoreAlive - timeReleaseGarbage).TotalMilliseconds
+                );
+        }
+
+        private List<WeakReferenceObjectIdPair> ReleseGarbageCollectedObjects(int checkCount, out int garbageCount)
+        {
+            var aliveList = new List<WeakReferenceObjectIdPair>();
+            garbageCount = 0;
+            int i = 0;
+            WeakReferenceObjectIdPair weakReferenceObjectIdPair;
+            while (weakReferences.TryDequeue(out weakReferenceObjectIdPair))
+            {
+                var weakRef = weakReferenceObjectIdPair.Key;
+                if (weakRef.IsAlive)
+                {
+                    aliveList.Add(weakReferenceObjectIdPair);
+                }
+                else
+                {
+                    objectReleaser.ReleaseObject(weakReferenceObjectIdPair.Value);
+                    garbageCount++;
+                }
+
+                i++;
+                if (i >= checkCount)
+                {
+                    logger.LogDebug("Stop releasing as exceeded allowed checkCount: {0}", checkCount);
+                    break;
+                }
+            }
+
+            return aliveList;
+        }
+
+        /// <summary>
+        /// It can be an expensive operation. ** Do not use ** unless there is a real need for this method
+        /// </summary>
+        /// <returns></returns>
+        public int GetAliveCount()
+        {
+            //copying to get alive count at the time of this method call
+            var copiedList = new Queue<WeakReferenceObjectIdPair>(weakReferences);
+            var count = 0;
+            foreach (var weakReference in copiedList)
+            {
+                if (weakReference.Key.IsAlive)
+                {
+                    count++;
+                }
+            }
+
+            return count;
+        }
+
+        public virtual void Dispose()
+        {
+            logger.LogInfo("Dispose {0}", this.GetType());
+            shouldKeepRunning = false;
+        }
+    }
+
+    internal interface IObjectReleaser
+    {
+        void ReleaseObject(string objId);
+    }
+
+    internal class JvmObjectReleaser : IObjectReleaser
+    {
+        private const string ReleaseHandler = "SparkCLRHandler";
+        private const string ReleaseMethod = "rm";
+
+        public void ReleaseObject(string objId)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod(ReleaseHandler, ReleaseMethod, objId);
+        }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs
@ -8,6 +8,7 @@ using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Proxy;
 using Microsoft.Spark.CSharp.Proxy.Ipc;

+[assembly: InternalsVisibleTo("Tests.Common")]
 [assembly: InternalsVisibleTo("AdapterTest")]
 [assembly: InternalsVisibleTo("WorkerTest")]
 // DynamicProxyGenAssembly2 is a temporary assembly built by mocking systems that use CastleProxy like Moq
@ -39,5 +40,12 @@ namespace Microsoft.Spark.CSharp.Interop
                configurationService = value;
            }
        }
+
+        private static IWeakObjectManager weakObjectManager;
+        internal static IWeakObjectManager WeakObjectManager
+        {
+            get { return weakObjectManager ?? (weakObjectManager = new WeakObjectManagerImpl()); }
+            set { weakObjectManager = value; }
+        }
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs
@ -0,0 +1,130 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.IO;
+using System.Net;
+using System.Net.Sockets;
+
+namespace Microsoft.Spark.CSharp.Network
+{
+    /// <summary>
+    /// A simple wrapper of System.Net.Sockets.Socket class.
+    /// </summary>
+    public class DefaultSocketWrapper : ISocketWrapper
+    {
+        private readonly Socket innerSocket;
+
+        /// <summary>
+        /// Default constructor that creates a new instance of DefaultSocket class which represents
+        /// a traditional socket (System.Net.Socket.Socket).
+        /// 
+        /// This socket is bound to Loopback with port 0.
+        /// </summary>
+        public DefaultSocketWrapper()
+        {
+            innerSocket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
+            var localEndPoint = new IPEndPoint(IPAddress.Loopback, 0);
+            innerSocket.Bind(localEndPoint);
+        }
+
+        /// <summary>
+        /// Initializes a instance of DefaultSocket class using the specified System.Net.Socket.Socket object.
+        /// </summary>
+        /// <param name="socket">The existing socket</param>
+        private DefaultSocketWrapper(Socket socket)
+        {
+            innerSocket = socket;
+        }
+
+        /// <summary>
+        /// Accepts a incoming connection request.
+        /// </summary>
+        /// <returns>A DefaultSocket instance used to send and receive data</returns>
+        public ISocketWrapper Accept()
+        {
+            var socket = innerSocket.Accept();
+            return new DefaultSocketWrapper(socket);
+        }
+
+        /// <summary>
+        /// Close the socket connections and releases all associated resources.
+        /// </summary>
+        public void Close()
+        {
+            innerSocket.Close();
+        }
+
+        /// <summary>
+        /// Establishes a connection to a remote host that is specified by an IP address and a port number
+        /// </summary>
+        /// <param name="remoteaddr">The IP address of the remote host</param>
+        /// <param name="port">The port number of the remote host</param>
+        public void Connect(IPAddress remoteaddr, int port)
+        {
+            var remoteEndPoint = new IPEndPoint(remoteaddr, port);
+            innerSocket.Connect(remoteEndPoint);
+        }
+
+        /// <summary>
+        /// Returns the NetworkStream used to send and receive data.
+        /// </summary>
+        /// <returns>The underlying Stream instance that be used to send and receive data</returns>
+        /// <remarks>
+        /// GetStream returns a NetworkStream that you can use to send and receive data. You must close/dispose
+        /// the NetworkStream by yourself. Closing DefaultSocketWrapper does not release the NetworkStream
+        /// </remarks>
+        public Stream GetStream()
+        {
+            return new NetworkStream(innerSocket);
+        }
+
+        /// <summary>
+        /// Starts listening for incoming connections requests
+        /// </summary>
+        /// <param name="backlog">The maximum length of the pending connections queue. </param>
+        public void Listen(int backlog = (int)SocketOptionName.MaxConnections)
+        {
+            innerSocket.Listen(backlog);
+        }
+
+        /// <summary>
+        /// Disposes the resources used by this instance of the DefaultSocket class.
+        /// </summary>
+        /// <param name="disposing"></param>
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                innerSocket.Dispose();
+            }
+        }
+
+        /// <summary>
+        /// Releases all resources used by the current instance of the DefaultSocket class.
+        /// </summary>
+        public void Dispose()
+        {
+            Dispose(true);
+        }
+
+        /// <summary>
+        /// Frees resources used by DefaultSocket class
+        /// </summary>
+        ~DefaultSocketWrapper()
+        {
+            Dispose(false);
+        }
+
+        /// <summary>
+        /// Returns the local endpoint.
+        /// </summary>
+        public EndPoint LocalEndPoint
+        {
+            get
+            {
+                return innerSocket.LocalEndPoint;
+            }
+        }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs
@ -0,0 +1,52 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.IO;
+using System.Net;
+using System.Net.Sockets;
+
+namespace Microsoft.Spark.CSharp.Network
+{
+    /// <summary>
+    /// ISocketWrapper interface defines the common methods to operate a socket (traditional socket or 
+    /// Windows Registered IO socket)
+    /// </summary>
+    public interface ISocketWrapper : IDisposable
+    {
+        /// <summary>
+        /// Accepts a incoming connection request.
+        /// </summary>
+        /// <returns>A ISocket instance used to send and receive data</returns>
+        ISocketWrapper Accept();
+
+        /// <summary>
+        /// Close the ISocket connections and releases all associated resources.
+        /// </summary>
+        void Close();
+
+        /// <summary>
+        /// Establishes a connection to a remote host that is specified by an IP address and a port number
+        /// </summary>
+        /// <param name="remoteaddr">The IP address of the remote host</param>
+        /// <param name="port">The port number of the remote host</param>
+        void Connect(IPAddress remoteaddr, int port);
+
+        /// <summary>
+        /// Returns a stream used to send and receive data.
+        /// </summary>
+        /// <returns>The underlying Stream instance that be used to send and receive data</returns>
+        Stream GetStream();
+
+        /// <summary>
+        /// Starts listening for incoming connections requests
+        /// </summary>
+        /// <param name="backlog">The maximum length of the pending connections queue. </param>
+        void Listen(int backlog = (int)SocketOptionName.MaxConnections);
+
+        /// <summary>
+        /// Returns the local endpoint.
+        /// </summary>
+        EndPoint LocalEndPoint { get; }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/SocketFactory.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/SocketFactory.cs
@ -0,0 +1,27 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+namespace Microsoft.Spark.CSharp.Network
+{
+    /// <summary>
+    /// SocketFactory is used to create ISocketWrapper instance based on a configuration and OS version.
+    /// 
+    /// The ISocket instance can be RioSocket object, if the configuration is set to RioSocket and
+    /// only the application is running on a Windows OS that supports Registered IO socket.
+    /// </summary>
+    public static class SocketFactory
+    {
+        /// <summary>
+        /// Creates a ISocket instance based on the configuration and OS version.
+        /// </summary>
+        /// <returns>
+        /// A RioSocket instance, if the configuration is set to RioSocket and only the application
+        /// is running on a Window OS that supports Registered IO socket. By default, it returns
+        /// DefaultSocket instance which wraps System.Net.Sockets.Socket.
+        /// </returns>
+        public static ISocketWrapper CreateSocket()
+        {
+            return new DefaultSocketWrapper();
+        }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
@ -30,5 +30,5 @@ using System.Runtime.InteropServices;
 //      Build Number
 //      Revision
 //
-[assembly: AssemblyVersion("1.6.0.0")]
-[assembly: AssemblyFileVersion("1.6.0.0")]
+[assembly: AssemblyVersion("1.6.1.0")]
+[assembly: AssemblyFileVersion("1.6.1.0")]
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDStreamProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDStreamProxy.cs
@ -19,7 +19,7 @@ namespace Microsoft.Spark.CSharp.Proxy
        void CallForeachRDD(byte[] func, string serializedMode);
        void Print(int num = 10);
        void Persist(StorageLevelType storageLevelType);
-        void Checkpoint(long intervalMs);
+        void Checkpoint(int intervalSeconds);
        IRDDProxy[] Slice(long fromUnixTime, long toUnixTime);
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs
@ -42,6 +42,7 @@ namespace Microsoft.Spark.CSharp.Proxy
        IDataFrameProxy Replace<T>(object subset, Dictionary<T, T> toReplaceAndValueDict);
        IEnumerable<IDataFrameProxy> RandomSplit(IEnumerable<double> weights, long? seed);
        IDataFrameProxy Sort(IColumnProxy[] columns);
+        IDataFrameProxy SortWithinPartitions(IColumnProxy[] columns);
        IDataFrameProxy Alias(string alias);
        double Corr(string column1, string column2, string method);
        double Cov(string column1, string column2);
@ -55,6 +56,8 @@ namespace Microsoft.Spark.CSharp.Proxy
        void Persist(StorageLevelType storageLevelType);
        void Unpersist(bool blocking = true);
        IDataFrameProxy Repartition(int numPartitions);
+        IDataFrameProxy Repartition(int numPartitions, IColumnProxy[] columns);
+        IDataFrameProxy Repartition(IColumnProxy[] columns);
        IDataFrameProxy Sample(bool withReplacement, double fraction, long seed);
        IDataFrameWriterProxy Write();
    }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameReaderProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameReaderProxy.cs
@ -3,7 +3,6 @@

 using System;
 using System.Collections.Generic;
-using System.Data;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
@ -42,6 +42,5 @@ namespace Microsoft.Spark.CSharp.Proxy
        void SaveAsTextFile(string path, string compressionCodecClass);
        long Count();
        int CollectAndServe();
-        int PartitionLength();
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkCLRProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkCLRProxy.cs
@ -16,7 +16,7 @@ namespace Microsoft.Spark.CSharp.Proxy
        // or restore it from checkpoint. Thus this function is called before IStreamingContextProxy is initialized. So CheckpointExists()
        // should not be put to IStreamingContextProxy.
        bool CheckpointExists(string checkpointPath);
-        IStreamingContextProxy CreateStreamingContext(SparkContext sparkContext, long durationMs);
+        IStreamingContextProxy CreateStreamingContext(SparkContext sparkContext, int durationSeconds);
        IStreamingContextProxy CreateStreamingContext(string checkpointPath);
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
@ -15,6 +15,7 @@ namespace Microsoft.Spark.CSharp.Proxy
    internal interface ISparkContextProxy
    {
        ISqlContextProxy CreateSqlContext();
+        ISqlContextProxy CreateHiveContext();
        IColumnProxy CreateColumnFromName(string name);
        IColumnProxy CreateFunction(string name, object self);
        IColumnProxy CreateBinaryMathFunction(string name, object self, object other);
@ -50,7 +51,7 @@ namespace Microsoft.Spark.CSharp.Proxy
        int RunJob(IRDDProxy rdd, IEnumerable<int> partitions);
        IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId);
        IRDDProxy CreateCSharpRdd(IRDDProxy prefvJavaRddReference, byte[] command, Dictionary<string, string> environmentVariables, List<string> pythonIncludes, bool preservePartitioning, List<Broadcast> broadcastVariables, List<byte[]> accumulator);
-        IRDDProxy CreatePairwiseRDD(IRDDProxy javaReferenceInByteArrayRdd, int numPartitions);
+        IRDDProxy CreatePairwiseRDD(IRDDProxy javaReferenceInByteArrayRdd, int numPartitions, long partitionFuncId);
        IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, string returnType);
    }
    internal interface IBroadcastProxy
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISqlContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISqlContextProxy.cs
@ -14,12 +14,31 @@ namespace Microsoft.Spark.CSharp.Proxy
    internal interface ISqlContextProxy
    {
        IDataFrameReaderProxy Read();
+        ISqlContextProxy NewSession();
+        string GetConf(string key, string defaultValue);
+        void SetConf(string key, string value);
        IDataFrameProxy CreateDataFrame(IRDDProxy rddProxy, IStructTypeProxy structTypeProxy);
+        void RegisterDataFrameAsTable(IDataFrameProxy dataFrameProxy, string tableName);
+        void DropTempTable(string tableName);
+        IDataFrameProxy Table(string tableName);
+        IDataFrameProxy Tables();
+        IDataFrameProxy Tables(string databaseName);
+        IEnumerable<string> TableNames();
+        IEnumerable<string> TableNames(string databaseName);
+        void CacheTable(string tableName);
+        void UncacheTable(string tableName);
+        void ClearCache();
+        bool IsCached(string tableName);
        IDataFrameProxy ReadDataFrame(string path, StructType schema, Dictionary<string, string> options);
        IDataFrameProxy JsonFile(string path);
        IDataFrameProxy TextFile(string path, StructType schema, string delimiter);
        IDataFrameProxy TextFile(string path, string delimiter, bool hasHeader, bool inferSchema);
        IDataFrameProxy Sql(string query);
        void RegisterFunction(string name, byte[] command, string returnType);
+
+        #region HiveContext
+        void RefreshTable(string tableName);
+
+        #endregion
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IStreamingContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IStreamingContextProxy.cs
@ -16,18 +16,23 @@ namespace Microsoft.Spark.CSharp.Proxy
        SparkContext SparkContext { get; }
        void Start();
        void Stop();
-        void Remember(long durationMs);
+        void Remember(int durationSeconds);
        void Checkpoint(string directory);
        IDStreamProxy TextFileStream(string directory);
        IDStreamProxy SocketTextStream(string hostname, int port, StorageLevelType storageLevelType);
        IDStreamProxy KafkaStream(Dictionary<string, int> topics, Dictionary<string, string> kafkaParams, StorageLevelType storageLevelType);
        IDStreamProxy DirectKafkaStream(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets);
+        IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets,
+            int numPartitions, byte[] readFunc, string serializationMode);
        IDStreamProxy Union(IDStreamProxy firstDStreams, IDStreamProxy[] otherDStreams);
        void AwaitTermination();
-        void AwaitTermination(int timeout);
+        void AwaitTerminationOrTimeout(long timeout);
        IDStreamProxy CreateCSharpDStream(IDStreamProxy jdstream, byte[] func, string serializationMode);
        IDStreamProxy CreateCSharpTransformed2DStream(IDStreamProxy jdstream, IDStreamProxy jother, byte[] func, string serializationMode, string serializationModeOther);
        IDStreamProxy CreateCSharpReducedWindowedDStream(IDStreamProxy jdstream, byte[] func, byte[] invFunc, int windowSeconds, int slideSeconds, string serializationMode);
        IDStreamProxy CreateCSharpStateDStream(IDStreamProxy jdstream, byte[] func, string className, string serializationMode, string serializationMode2);
+        IDStreamProxy CreateConstantInputDStream(IRDDProxy rddProxy);
+        IDStreamProxy EventHubsUnionStream(Dictionary<string, string> eventHubsParams, StorageLevelType storageLevelType);
+
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DStreamIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DStreamIpcProxy.cs
@ -40,7 +40,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
        public IDStreamProxy Window(int windowSeconds, int slideSeconds = 0)
        {
            string windowId = null;
-            var windowDurationReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { windowSeconds * 1000 });
+            var windowDurationReference = JvmBridgeUtils.GetJavaDuration(windowSeconds);

            if (slideSeconds <= 0)
            {
@ -48,7 +48,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
                return new DStreamIpcProxy(new JvmObjectReference(windowId));
            }

-            var slideDurationReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { slideSeconds * 1000 });
+            var slideDurationReference = JvmBridgeUtils.GetJavaDuration(slideSeconds);
            windowId = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(javaDStreamReference, "window", new object[] { windowDurationReference, slideDurationReference });

            return new DStreamIpcProxy(new JvmObjectReference(windowId));
@ -77,9 +77,9 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "persist", new object[] { jstorageLevel });
        }

-        public void Checkpoint(long intervalMs)
+        public void Checkpoint(int intervalSeconds)
        {
-            var jinterval = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { intervalMs });
+            var jinterval = JvmBridgeUtils.GetJavaDuration(intervalSeconds);
            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "checkpoint", new object[] { jinterval });
        }

--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs
@ -14,6 +14,12 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
    internal class DataFrameIpcProxy : IDataFrameProxy
    {
        private readonly JvmObjectReference jvmDataFrameReference;
+
+        internal JvmObjectReference JvmDataFrameReference
+        {
+            get { return jvmDataFrameReference; }
+        }
+
        private readonly ISqlContextProxy sqlContextProxy;

        private readonly DataFrameNaFunctions na;
@ -405,6 +411,20 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
                        jvmDataFrameReference, "sort", columnsSeq).ToString()), sqlContextProxy);
        }

+        /// <summary>
+        /// Call https://github.com/apache/spark/blob/branch-1.6/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala, sortWithinPartitions(sortExprs: Column*): DataFrame
+        /// </summary>
+        /// <param name="columns"></param>
+        /// <returns></returns>
+        public IDataFrameProxy SortWithinPartitions(IColumnProxy[] columns)
+        {
+            var columnsSeq = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils",
+                        "toSeq", new object[] { columns.Select(c => (c as ColumnIpcProxy).ScalaColumnReference).ToArray() }));
+
+            return new DataFrameIpcProxy(new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
+                        jvmDataFrameReference, "sortWithinPartitions", columnsSeq).ToString()), sqlContextProxy);
+        }
+
        /// <summary>
        /// Call https://github.com/apache/spark/blob/branch-1.4/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala, as(alias: String): DataFrame
        /// </summary>
@ -517,6 +537,35 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
                        new object[] { numPartitions }).ToString()), sqlContextProxy);
        }

+        /// <summary>
+        /// Call https://github.com/apache/spark/blob/branch-1.6/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala, repartition(numPartitions: Int, partitionExprs: Column*): DataFrame
+        /// </summary>
+        /// <param name="numPartitions"></param>
+        /// <param name="columns"></param>
+        /// <returns></returns>
+        public IDataFrameProxy Repartition(int numPartitions, IColumnProxy[] columns)
+        {
+            var columnsSeq = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils",
+                        "toSeq", new object[] { columns.Select(c => (c as ColumnIpcProxy).ScalaColumnReference).ToArray() }));
+
+            return new DataFrameIpcProxy(new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
+                        jvmDataFrameReference, "repartition", new object[] { numPartitions, columnsSeq }).ToString()), sqlContextProxy);
+        }
+
+        /// <summary>
+        /// Call https://github.com/apache/spark/blob/branch-1.6/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala, repartition(partitionExprs: Column*): DataFrame
+        /// </summary>
+        /// <param name="columns"></param>
+        /// <returns></returns>
+        public IDataFrameProxy Repartition(IColumnProxy[] columns)
+        {
+            var columnsSeq = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils",
+                        "toSeq", new object[] { columns.Select(c => (c as ColumnIpcProxy).ScalaColumnReference).ToArray() }));
+
+            return new DataFrameIpcProxy(new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
+                        jvmDataFrameReference, "repartition", new object[] { columnsSeq }).ToString()), sqlContextProxy);
+        }
+
        public IDataFrameProxy Sample(bool withReplacement, double fraction, long seed)
        {
            return
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
@ -16,6 +16,7 @@ using Microsoft.Spark.CSharp.Interop.Ipc;
 namespace Microsoft.Spark.CSharp.Proxy.Ipc
 {
    [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
+    [Serializable]
    internal class RDDIpcProxy : IRDDProxy
    {
        private readonly JvmObjectReference jvmRddReference;
@ -78,13 +79,6 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            return new RDDIpcProxy(jref);
        }

-        public int PartitionLength()
-        {
-            var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd"));
-            var partitions = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(rdd, "partitions", new object[] { });
-            return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("java.lang.reflect.Array", "getLength", new object[] { partitions }).ToString());
-        }
-
        public IRDDProxy Coalesce(int numPartitions, bool shuffle)
        {
            return new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "coalesce", new object[] { numPartitions, shuffle })));
@ -166,7 +160,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc

        public IRDDProxy SampleByKey(bool withReplacement, Dictionary<string, double> fractions, long seed)
        {
-            var jfractions = SparkContextIpcProxy.GetJavaMap(fractions) as JvmObjectReference;
+            var jfractions = JvmBridgeUtils.GetJavaMap(fractions) as JvmObjectReference;
            return new RDDIpcProxy(new JvmObjectReference((string) SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "sampleByKey", new object[] { withReplacement, jfractions, seed })));
        }

@ -184,25 +178,25 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc

        public void SaveAsNewAPIHadoopDataset(IEnumerable<KeyValuePair<string, string>> conf)
        {
-            var jconf = SparkContextIpcProxy.GetJavaMap<string, string>(conf);
+            var jconf = JvmBridgeUtils.GetJavaMap<string, string>(conf);
            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopDataset", new object[] { jvmRddReference, false, jconf, null, null, true });
        }

        public void SaveAsNewAPIHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf)
        {
-            var jconf = SparkContextIpcProxy.GetJavaMap<string, string>(conf);
+            var jconf = JvmBridgeUtils.GetJavaMap<string, string>(conf);
            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsNewAPIHadoopFile", new object[] { jvmRddReference, false, path, outputFormatClass, keyClass, valueClass, null, null, jconf });
        }

        public void SaveAsHadoopDataset(IEnumerable<KeyValuePair<string, string>> conf)
        {
-            var jconf = SparkContextIpcProxy.GetJavaMap<string, string>(conf);
+            var jconf = JvmBridgeUtils.GetJavaMap<string, string>(conf);
            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopDataset", new object[] { jvmRddReference, false, jconf, null, null, false });
        }

        public void SaveAsHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf, string compressionCodecClass)
        {
-            var jconf = SparkContextIpcProxy.GetJavaMap<string, string>(conf);
+            var jconf = JvmBridgeUtils.GetJavaMap<string, string>(conf);
            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopFile", new object[] { jvmRddReference, false, path, outputFormatClass, keyClass, valueClass, null, null, jconf, compressionCodecClass });
        }

@ -211,17 +205,18 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "SaveAsSequenceFile", new object[] { jvmRddReference, false, path, compressionCodecClass });
        }

+        //this method is called by RDD<string> (implementation is at StringRDDFunctions.SaveAsTextFile)
+        //calling saveAsTextFile() on CSharpRDDs result in bytes written to text file - so calling saveStringRddAsTextFile() which converts bytes to string before writing to file
        public void SaveAsTextFile(string path, string compressionCodecClass)
        {
-            var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd"));
            if (!string.IsNullOrEmpty(compressionCodecClass))
            {
                var codec = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("java.lang.Class", "forName", new object[] { compressionCodecClass }));
-                SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "saveAsTextFile", new object[] { path, codec });
+                SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.csharp.CSharpRDD", "saveStringRddAsTextFile", new object[] { jvmRddReference, path, codec });
            }
            else
            {
-                SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "saveAsTextFile", new object[] { path });
+                SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.csharp.CSharpRDD", "saveStringRddAsTextFile", new object[] { jvmRddReference, path });
            }
        }
        public StorageLevel GetStorageLevel()
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkCLRIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkCLRIpcProxy.cs
@ -80,9 +80,9 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
                SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(fs, "listStatus", path) != null;
        }

-        public IStreamingContextProxy CreateStreamingContext(SparkContext sparkContext, long durationMs)
+        public IStreamingContextProxy CreateStreamingContext(SparkContext sparkContext, int durationSeconds)
        {
-            streamingContextIpcProxy = new StreamingContextIpcProxy(sparkContext, durationMs);
+            streamingContextIpcProxy = new StreamingContextIpcProxy(sparkContext, durationSeconds);
            return streamingContextIpcProxy;
        }

--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
@ -39,6 +39,13 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            return new SqlContextIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "createSQLContext", new object[] { jvmSparkContextReference })));
        }

+        public ISqlContextProxy CreateHiveContext()
+        {
+            return new SqlContextIpcProxy(new JvmObjectReference(
+                (string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod(
+                    "org.apache.spark.sql.api.csharp.SQLUtils", "createHiveContext", new object[] { jvmSparkContextReference })));
+        }
+
        public void CreateSparkContext(string master, string appName, string sparkHome, ISparkConfProxy conf)
        {
            object[] args = (new object[] { master, appName, sparkHome, (conf == null ? null : (conf as SparkConfIpcProxy).JvmSparkConfReference) }).Where(x => x != null).ToArray();
@ -152,7 +159,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc

        public IRDDProxy NewAPIHadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
        {
-            var jconf = GetJavaHashMap<string, string>(conf);
+            var jconf = JvmBridgeUtils.GetJavaHashMap<string, string>(conf);
            var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "newAPIHadoopFile",
                new object[] { jvmJavaContextReference, filePath, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, jconf, batchSize }));
            return new RDDIpcProxy(jvmRddReference);
@ -160,7 +167,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc

        public IRDDProxy NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
        {
-            var jconf = GetJavaHashMap<string, string>(conf);
+            var jconf = JvmBridgeUtils.GetJavaHashMap<string, string>(conf);
            var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "newAPIHadoopRDD",
                new object[] { jvmJavaContextReference, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, jconf, batchSize }));
            return new RDDIpcProxy(jvmRddReference);
@ -168,7 +175,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc

        public IRDDProxy HadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
        {
-            var jconf = GetJavaHashMap<string, string>(conf);
+            var jconf = JvmBridgeUtils.GetJavaHashMap<string, string>(conf);
            var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "hadoopFile",
                new object[] { jvmJavaContextReference, filePath, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, jconf, batchSize }));
            return new RDDIpcProxy(jvmRddReference);
@ -176,7 +183,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc

        public IRDDProxy HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
        {
-            var jconf = GetJavaHashMap<string, string>(conf);
+            var jconf = JvmBridgeUtils.GetJavaHashMap<string, string>(conf);
            var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "hadoopRDD",
                new object[] { jvmJavaContextReference, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, jconf, batchSize }));
            return new RDDIpcProxy(jvmRddReference);
@ -191,7 +198,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
        public IRDDProxy Union(IEnumerable<IRDDProxy> rdds)
        {
            var jfirst = (rdds.First() as RDDIpcProxy).JvmRddReference;
-            var jrest = GetJavaList<JvmObjectReference>(rdds.Skip(1).Select(r => (r as RDDIpcProxy).JvmRddReference));
+            var jrest = JvmBridgeUtils.GetJavaList<JvmObjectReference>(rdds.Skip(1).Select(r => (r as RDDIpcProxy).JvmRddReference));
            var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "union", new object[] { jfirst, jrest }));
            return new RDDIpcProxy(jvmRddReference);
        }
@ -250,13 +257,20 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            }
        }

-        public IRDDProxy CreatePairwiseRDD(IRDDProxy jvmReferenceOfByteArrayRdd, int numPartitions)
+        /// <summary>
+        /// Create a PairwiseRDD.
+        /// </summary>
+        /// <param name="jvmReferenceOfByteArrayRdd"></param>
+        /// <param name="numPartitions"></param>
+        /// <param name="partitionFuncId">Global unique id of partitioner which is used for comparison PythonPartitioners in JVM.</param>
+        /// <returns></returns>
+        public IRDDProxy CreatePairwiseRDD(IRDDProxy jvmReferenceOfByteArrayRdd, int numPartitions, long partitionFuncId)
        {
            var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((jvmReferenceOfByteArrayRdd as RDDIpcProxy).JvmRddReference, "rdd"));
            var pairwiseRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PairwiseRDD", rdd);
            var pairRddJvmReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pairwiseRdd, "asJavaPairRDD", new object[] { }).ToString());

-            var jpartitionerJavaReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonPartitioner", new object[] { numPartitions, (long)0 });
+            var jpartitionerJavaReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonPartitioner", new object[] { numPartitions, partitionFuncId });
            var partitionedPairRddJvmReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pairRddJvmReference, "partitionBy", new object[] { jpartitionerJavaReference }).ToString());
            var jvmRddReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "valueOfPair", new object[] { partitionedPairRddJvmReference }).ToString());
            //var jvmRddReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(partitionedRddJvmReference, "rdd", new object[] { }).ToString());
@ -267,7 +281,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
        {
            var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
            var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });
-            var jbroadcastVariables = GetJavaList<JvmObjectReference>(jvmBroadcastReferences);
+            var jbroadcastVariables = JvmBridgeUtils.GetJavaList<JvmObjectReference>(jvmBroadcastReferences);

            var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((prevJvmRddReference as RDDIpcProxy).JvmRddReference, "rdd"));

@ -288,7 +302,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
        {
            var jSqlContext = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.SQLContext", new object[] { jvmSparkContextReference });
            var jDataType = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jSqlContext, "parseDataType", new object[] { "\"" + returnType + "\"" }));
-            var jbroadcastVariables = GetJavaList<JvmObjectReference>(jvmBroadcastReferences);
+            var jbroadcastVariables = JvmBridgeUtils.GetJavaList<JvmObjectReference>(jvmBroadcastReferences);

            var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
            var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });
@ -306,7 +320,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
        
        public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
        {
-            var jpartitions = GetJavaList<int>(partitions);
+            var jpartitions = JvmBridgeUtils.GetJavaList<int>(partitions);
            return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }).ToString());
        }

@ -333,7 +347,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            if (self is ColumnIpcProxy)
                self = (self as ColumnIpcProxy).ScalaColumnReference;
            else if (self is IColumnProxy[])
-                self = GetJavaSeq<JvmObjectReference>((self as IColumnProxy[]).Select(x => (x as ColumnIpcProxy).ScalaColumnReference));
+                self = JvmBridgeUtils.GetJavaSeq<JvmObjectReference>((self as IColumnProxy[]).Select(x => (x as ColumnIpcProxy).ScalaColumnReference));
            return new ColumnIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.functions", name, self)));
        }

@ -351,52 +365,6 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            return new ColumnIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.functions", name)));
        }

-        public static JvmObjectReference GetJavaMap<K, V>(IEnumerable<KeyValuePair<K, V>> enumerable)
-        {
-            var jmap = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
-            if (enumerable != null)
-            {
-                foreach (var item in enumerable)
-                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jmap, "put", new object[] { item.Key, item.Value });
-            }
-            return jmap;
-        }
-
-        public static JvmObjectReference GetJavaHashMap<K, V>(IEnumerable<KeyValuePair<K, V>> enumerable)
-        {
-            var jmap = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.HashMap", new object[] { });
-            if (enumerable != null)
-            {
-                foreach (var item in enumerable)
-                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jmap, "put", new object[] { item.Key, item.Value });
-            }
-            return jmap;
-        }
-
-        public static JvmObjectReference GetJavaSet<T>(IEnumerable<T> enumerable)
-        {
-            var jset = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.HashSet", new object[] { });
-            if (enumerable != null)
-            {
-                foreach (var item in enumerable)
-                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jset, "add", new object[] { item });
-            }
-            return jset;
-        }
-        public static JvmObjectReference GetJavaList<T>(IEnumerable<T> enumerable)
-        {
-            var jlist = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });
-            if (enumerable != null)
-            {
-                foreach (var item in enumerable)
-                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jlist, "add", new object[] { item });
-            }
-            return jlist;
-        }
-        public static JvmObjectReference GetJavaSeq<T>(IEnumerable<T> enumerable)
-        {
-            return new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "toSeq", GetJavaList<T>(enumerable)));
-        }
        public static JvmObjectReference GetJavaStorageLevel(StorageLevelType storageLevelType)
        {
            return new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.java.StorageLevels", "create",
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
@ -105,5 +105,100 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc

            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython", new object[] {name, udf});
        }
+
+        public ISqlContextProxy NewSession()
+        {
+            return new SqlContextIpcProxy(
+                new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "newSession")));
+        }
+
+        public string GetConf(string key, string defaultValue)
+        {
+            return (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "getConf", new object[] { key, defaultValue });
+        }
+
+        public void SetConf(string key, string value)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "setConf", new object[] { key, value });
+        }
+
+        public void RegisterDataFrameAsTable(IDataFrameProxy dataFrameProxy, string tableName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
+                jvmSqlContextReference, "registerDataFrameAsTable", 
+                new object[] { (dataFrameProxy as DataFrameIpcProxy).JvmDataFrameReference, tableName });
+        }
+
+        public void DropTempTable(string tableName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
+                jvmSqlContextReference, "dropTempTable", new object[] { tableName });
+        }
+
+        public IDataFrameProxy Table(string tableName)
+        {
+            return new DataFrameIpcProxy(
+                new JvmObjectReference(
+                    (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "table",
+                        new object[] { tableName })), this);
+        }
+
+        public IDataFrameProxy Tables()
+        {
+            return new DataFrameIpcProxy(
+                new JvmObjectReference(
+                    (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "tables")), this);
+        }
+
+        public IDataFrameProxy Tables(string databaseName)
+        {
+            return new DataFrameIpcProxy(
+                new JvmObjectReference(
+                    (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "tables",
+                        new object[] { databaseName })), this);
+        }
+
+        public IEnumerable<string> TableNames()
+        {
+            var tableNames = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "tableNames");
+            return (List<string>) tableNames;
+        }
+
+        public IEnumerable<string> TableNames(string databaseName)
+        {
+            return (List<string>)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "tableNames",
+                new object[] { databaseName });
+        }
+
+        public void CacheTable(string tableName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "cacheTable",
+                new object[] { tableName });
+        }
+
+        public void UncacheTable(string tableName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "uncacheTable",
+                new object[] { tableName });
+        }
+
+        public void ClearCache()
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "clearCache");
+        }
+
+        public bool IsCached(string tableName)
+        {
+            return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "isCached",
+                new object[] { tableName });
+        }
+
+        #region HiveContext
+        public void RefreshTable(string tableName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "refreshTable",
+                new object[] { tableName });
+        }
+        #endregion
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/StreamingContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/StreamingContextIpcProxy.cs
@ -10,12 +10,12 @@ using System.Net;
 using System.Net.Sockets;
 using System.Runtime.Serialization;
 using System.Runtime.Serialization.Formatters.Binary;
-using System.Text;
 using System.Threading;
 using System.Threading.Tasks;

 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Services;

 namespace Microsoft.Spark.CSharp.Proxy.Ipc
@ -26,7 +26,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
    [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
    internal class StreamingContextIpcProxy : IStreamingContextProxy
    {
-        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SparkConf));
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(StreamingContextIpcProxy));
        internal readonly JvmObjectReference jvmStreamingContextReference;
        private readonly JvmObjectReference jvmJavaStreamingReference;
        private readonly ISparkContextProxy sparkContextProxy;
@ -43,36 +43,51 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            }
        }

-        public StreamingContextIpcProxy(SparkContext sparkContext, long durationMs)
+        public StreamingContextIpcProxy(SparkContext sparkContext, int durationSeconds)
        {
            this.sparkContext = sparkContext;
            sparkContextProxy = sparkContext.SparkContextProxy;
-            var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { durationMs });
+            var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds);

            JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference;
            jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration });
            jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference });
+
+            StartAccumulatorServer(sparkContext);
+            StartCallbackServer();
        }
-        
+
        public StreamingContextIpcProxy(string checkpointPath)
        {
+            sparkContext = SparkContext.GetActiveSparkContext();
+            StartCallbackServer();
+
            jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath });
            jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc"));
-            JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc"));
-            JvmObjectReference jvmSparkConfReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf"));
-            JvmObjectReference jvmJavaContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext"));
-            sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference);
-            var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference);
-            sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy));
+            if (sparkContext == null)
+            {
+                JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc"));
+                JvmObjectReference jvmSparkConfReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf"));
+                JvmObjectReference jvmJavaContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext"));
+                sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference);
+                var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference);
+                sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy));
+            }
+            else
+            {
+                sparkContextProxy = sparkContext.SparkContextProxy;
+            }
+            StartAccumulatorServer(sparkContext);
+        }

+        private void StartAccumulatorServer(SparkContext sparkContext)
+        {
            // TODO: We don't know whether accumulator variable is used before restart. We just start accumuator server for safety.
            sparkContext.StartAccumulatorServer();
        }

        public void Start()
        {
-            int port = StartCallback();
-            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("SparkCLRHandler", "connectCallback", port); //className and methodName hardcoded in CSharpBackendHandler
            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "start");
        }

@ -84,9 +99,9 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("SparkCLRHandler", "closeCallback");
        }

-        public void Remember(long durationMs)
+        public void Remember(int durationSeconds)
        {
-            var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { (int)durationMs });
+            var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds);

            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "remember", new object[] { jduration });
        }
@ -119,8 +134,8 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc

        public IDStreamProxy CreateCSharpReducedWindowedDStream(IDStreamProxy jdstream, byte[] func, byte[] invFunc, int windowSeconds, int slideSeconds, string serializationMode)
        {
-            var windowDurationReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { windowSeconds * 1000 });
-            var slideDurationReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { slideSeconds * 1000 });
+            var windowDurationReference = JvmBridgeUtils.GetJavaDuration(windowSeconds);
+            var slideDurationReference = JvmBridgeUtils.GetJavaDuration(slideSeconds);

            var jvmDStreamReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.csharp.CSharpReducedWindowedDStream",
                new object[] { (jdstream as DStreamIpcProxy).jvmDStreamReference, func, invFunc, windowDurationReference, slideDurationReference, serializationMode });
@ -138,6 +153,21 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            return new DStreamIpcProxy(javaDStreamReference, jvmDStreamReference);
        }

+        public IDStreamProxy CreateConstantInputDStream(IRDDProxy rddProxy)
+        {
+            var rddReference =
+                new JvmObjectReference(
+                    (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(((RDDIpcProxy)rddProxy).JvmRddReference, "rdd"));
+
+            var jvmDStreamReference = SparkCLRIpcProxy.JvmBridge.CallConstructor(
+                "org.apache.spark.streaming.api.csharp.CSharpConstantInputDStream", jvmStreamingContextReference, rddReference);
+
+            var javaDStreamReference =
+                new JvmObjectReference((String)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "asJavaDStream"));
+
+            return new DStreamIpcProxy(javaDStreamReference, jvmDStreamReference);
+        }
+
        public IDStreamProxy TextFileStream(string directory)
        {
            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "textFileStream", new object[] { directory }).ToString());
@ -153,19 +183,19 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc

        public IDStreamProxy KafkaStream(Dictionary<string, int> topics, Dictionary<string, string> kafkaParams, StorageLevelType storageLevelType)
        {
-            JvmObjectReference jtopics = SparkContextIpcProxy.GetJavaMap<string, int>(topics);
-            JvmObjectReference jkafkaParams = SparkContextIpcProxy.GetJavaMap<string, string>(kafkaParams);
+            JvmObjectReference jtopics = JvmBridgeUtils.GetJavaMap<string, int>(topics);
+            JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap<string, string>(kafkaParams);
            JvmObjectReference jlevel = SparkContextIpcProxy.GetJavaStorageLevel(storageLevelType);
            // KafkaUtilsPythonHelper: external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
            JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { });
            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createStream", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jlevel }).ToString());
            return new DStreamIpcProxy(jstream);
        }
-        
+
        public IDStreamProxy DirectKafkaStream(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets)
        {
-            JvmObjectReference jtopics = SparkContextIpcProxy.GetJavaSet<string>(topics);
-            JvmObjectReference jkafkaParams = SparkContextIpcProxy.GetJavaMap<string, string>(kafkaParams);
+            JvmObjectReference jtopics = JvmBridgeUtils.GetJavaSet<string>(topics);
+            JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap<string, string>(kafkaParams);

            var jTopicAndPartitions = fromOffsets.Select(x =>
                new KeyValuePair<JvmObjectReference, long>
@ -175,13 +205,48 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
                )
            );

-            JvmObjectReference jfromOffsets = SparkContextIpcProxy.GetJavaMap<JvmObjectReference, long>(jTopicAndPartitions);
+            JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap<JvmObjectReference, long>(jTopicAndPartitions);
            // KafkaUtilsPythonHelper: external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
            JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { });
            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets }).ToString());
            return new DStreamIpcProxy(jstream);
        }
-        
+
+        public IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, Dictionary<string, string> kafkaParams,
+            Dictionary<string, long> fromOffsets, int numPartitions, byte[] readFunc, string serializationMode)
+        {
+            JvmObjectReference jtopics = JvmBridgeUtils.GetJavaSet<string>(topics);
+            JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap<string, string>(kafkaParams);
+
+            var jTopicAndPartitions = fromOffsets.Select(x =>
+                new KeyValuePair<JvmObjectReference, long>
+                (
+                    SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Key.Split(':')[0], int.Parse(x.Key.Split(':')[1]) }),
+                    x.Value
+                )
+            );
+
+            JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap<JvmObjectReference, long>(jTopicAndPartitions);
+            // SparkCLR\scala\src\main\org\apache\spark\streaming\api\kafka\KafkaUtilsCSharpHelper.scala
+            JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsCSharpHelper", new object[] { });
+            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler",
+                new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets, (int)numPartitions, readFunc, serializationMode }).ToString());
+            return new DStreamIpcProxy(jstream);
+        }
+
+        public IDStreamProxy EventHubsUnionStream(Dictionary<string, string> eventHubsParams, StorageLevelType storageLevelType)
+        {
+            JvmObjectReference eventHubsParamsReference = JvmBridgeUtils.GetScalaMutableMap<string, string>(eventHubsParams);
+            JvmObjectReference storageLevelTypeReference = SparkContextIpcProxy.GetJavaStorageLevel(storageLevelType);
+            return
+                new DStreamIpcProxy(
+                    new JvmObjectReference(
+                        SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod(
+                            "org.apache.spark.streaming.api.csharp.EventHubsUtils", "createUnionStream",
+                            new object[] { jvmJavaStreamingReference, eventHubsParamsReference, storageLevelTypeReference })
+                            .ToString()));
+        }
+
        public IDStreamProxy Union(IDStreamProxy firstDStream, IDStreamProxy[] otherDStreams)
        {
            return new DStreamIpcProxy(
@ -190,7 +255,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
                        new object[] 
                        { 
                            (firstDStream as DStreamIpcProxy).javaDStreamReference,
-                            SparkContextIpcProxy.GetJavaList<JvmObjectReference>(otherDStreams.Select(x => (x as DStreamIpcProxy).javaDStreamReference))
+                            JvmBridgeUtils.GetJavaList<JvmObjectReference>(otherDStreams.Select(x => (x as DStreamIpcProxy).javaDStreamReference))
                        }
                    )));
        }
@ -200,19 +265,19 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "awaitTermination");
        }

-        public void AwaitTermination(int timeout)
+        public void AwaitTerminationOrTimeout(long timeout)
        {
-            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "awaitTermination", new object[] { timeout });
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "awaitTerminationOrTimeout", new object[] { timeout });
        }

        private void ProcessCallbackRequest(object socket)
        {
-            logger.LogDebug("new thread created to process callback request");
+            logger.LogDebug("New thread (id={0}) created to process callback request", Thread.CurrentThread.ManagedThreadId);

            try
            {
-                using (Socket sock = (Socket)socket)
-                using (var s = new NetworkStream(sock))
+                using (var sock = (ISocketWrapper)socket)
+                using (var s = sock.GetStream())
                {
                    while (true)
                    {
@ -268,6 +333,7 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
                            //log exception only when callback socket is not shutdown explicitly
                            if (!callbackSocketShutdown)
                            {
+                                logger.LogError("Exception processing call back request. Thread id {0}", Thread.CurrentThread.ManagedThreadId);
                                logger.LogException(e);

                                // exit when exception happens
@ -281,16 +347,17 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
            }
            catch (Exception e)
            {
+                logger.LogError("Exception in callback. Thread id {0}", Thread.CurrentThread.ManagedThreadId);
                logger.LogException(e);
            }

-            logger.LogDebug("thread to process callback request exit");
+            logger.LogDebug("Thread (id={0}) to process callback request exiting", Thread.CurrentThread.ManagedThreadId);
        }

-        public int StartCallback()
+        private int StartCallbackServer()
        {
-            TcpListener callbackServer = new TcpListener(IPAddress.Loopback, 0);
-            callbackServer.Start();
+            var callbackServer = SocketFactory.CreateSocket();
+            callbackServer.Listen();

            Task.Run(() =>
            {
@ -299,23 +366,28 @@ namespace Microsoft.Spark.CSharp.Proxy.Ipc
                    ThreadPool.SetMaxThreads(10, 10);
                    while (!callbackSocketShutdown)
                    {
-                        Socket sock = callbackServer.AcceptSocket();
-                        ThreadPool.QueueUserWorkItem(new WaitCallback(ProcessCallbackRequest), sock);
+                        var sock = callbackServer.Accept();
+                        ThreadPool.QueueUserWorkItem(ProcessCallbackRequest, sock);
                    }
                }
                catch (Exception e)
                {
+                    logger.LogError("Exception starting callback server");
                    logger.LogException(e);
                    throw;
                }
                finally
                {
                    if (callbackServer != null)
-                        callbackServer.Stop();
+                        callbackServer.Close();
                }
            });

-            return (callbackServer.LocalEndpoint as IPEndPoint).Port;
+            int port = (callbackServer.LocalEndPoint as IPEndPoint).Port;
+            logger.LogInfo("Callback server port number is {0}", port);
+            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("SparkCLRHandler", "connectCallback", port); //className and methodName hard coded in CSharpBackendHandler
+
+            return port;
        }
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Services/DefaultLoggerService.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Services/DefaultLoggerService.cs
@ -13,6 +13,11 @@ namespace Microsoft.Spark.CSharp.Services
    public class DefaultLoggerService : ILoggerService
    {
        internal readonly static DefaultLoggerService Instance = new DefaultLoggerService(typeof (Type));
+        /// <summary>
+        /// Get an instance of ILoggerService by a given type of logger
+        /// </summary>
+        /// <param name="type">The type of a logger to return</param>
+        /// <returns>An instance of ILoggerService</returns>
        public ILoggerService GetLoggerInstance(Type type)
        {
            return new DefaultLoggerService(type);
@ -24,31 +29,105 @@ namespace Microsoft.Spark.CSharp.Services
            type = t;
        }
        
+        /// <summary>
+        /// Logs a message at debug level.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogDebug(string message)
        {
            Log("Debug", message);
        }

+        /// <summary>
+        /// Logs a message at debug level with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogDebug(string messageFormat, params object[] messageParameters)
+        {
+            Log("Debug", string.Format(messageFormat, messageParameters));
+        }
+
+        /// <summary>
+        /// Logs a message at info level.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogInfo(string message)
        {
            Log("Info", message);
        }

+        /// <summary>
+        /// Logs a message at info level with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogInfo(string messageFormat, params object[] messageParameters)
+        {
+            Log("Info", string.Format(messageFormat, messageParameters));
+        }
+
+        /// <summary>
+        /// Logs a message at warning level.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogWarn(string message)
        {
            Log("Warn", message);
        }

+        /// <summary>
+        /// Logs a message at warning level with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogWarn(string messageFormat, params object[] messageParameters)
+        {
+            Log("Warn", string.Format(messageFormat, messageParameters));
+        }
+
+        /// <summary>
+        /// Logs a fatal message.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogFatal(string message)
        {
            Log("Fatal", message);
        }

+        /// <summary>
+        /// Logs a fatal message with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogFatal(string messageFormat, params object[] messageParameters)
+        {
+            Log("Fatal", string.Format(messageFormat, messageParameters));
+        }
+
+        /// <summary>
+        /// Logs a error message.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogError(string message)
        {
            Log("Error", message);
        }

+        /// <summary>
+        /// Logs a error message with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogError(string messageFormat, params object[] messageParameters)
+        {
+            Log("Error", string.Format(messageFormat, messageParameters));
+        }
+
+        /// <summary>
+        /// Logs an exception
+        /// </summary>
+        /// <param name="e">The exception to be logged</param>
        public void LogException(Exception e)
        {
            Log("Exception", string.Format("{0}{1}{2}", e.Message, Environment.NewLine, e.StackTrace));
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Services/ILoggerService.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Services/ILoggerService.cs
@ -1,19 +1,77 @@
 using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;

 namespace Microsoft.Spark.CSharp.Services
 {
+    /// <summary>
+    /// Defines a logger what be used in service
+    /// </summary>
    public interface ILoggerService
    {
+        /// <summary>
+        /// Get an instance of ILoggerService by a given type of logger
+        /// </summary>
+        /// <param name="type">The type of a logger to return</param>
+        /// <returns>An instance of ILoggerService</returns>
        ILoggerService GetLoggerInstance(Type type);
+        /// <summary>
+        /// Logs a message at debug level.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        void LogDebug(string message);
+        /// <summary>
+        /// Logs a message at debug level with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        void LogDebug(string messageFormat, params object[] messageParameters);
+        /// <summary>
+        /// Logs a message at info level.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        void LogInfo(string message);
+        /// <summary>
+        /// Logs a message at info level with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        void LogInfo(string messageFormat, params object[] messageParameters);
+        /// <summary>
+        /// Logs a message at warning level.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        void LogWarn(string message);
+        /// <summary>
+        /// Logs a message at warning level with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        void LogWarn(string messageFormat, params object[] messageParameters);
+        /// <summary>
+        /// Logs a fatal message.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        void LogFatal(string message);
+        /// <summary>
+        /// Logs a fatal message with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        void LogFatal(string messageFormat, params object[] messageParameters);
+        /// <summary>
+        /// Logs a error message.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        void LogError(string message);
+        /// <summary>
+        /// Logs a error message with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        void LogError(string messageFormat, params object[] messageParameters);
+        /// <summary>
+        /// Logs an exception
+        /// </summary>
+        /// <param name="e">The exception to be logged</param>
        void LogException(Exception e);
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Services/Log4NetLoggerService.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Services/Log4NetLoggerService.cs
@ -10,11 +10,17 @@ using log4net.Config;

 namespace Microsoft.Spark.CSharp.Services
 {
-    [ExcludeFromCodeCoverage] //unit test coverage not reqiured for logger service
+    /// <summary>
+    /// Represents a Log4Net logger.
+    /// </summary>
+    [ExcludeFromCodeCoverage] //unit test coverage not required for logger service
    public class Log4NetLoggerService : ILoggerService
    {
        private readonly ILog logger;
        private const string exceptionLogDelimiter = "*******************************************************************************************************************************";
+        /// <summary>
+        /// Gets a instance of Log4Net logger
+        /// </summary>
        public static Log4NetLoggerService Instance = new Log4NetLoggerService(typeof(Type));

        static Log4NetLoggerService()
@ -22,37 +28,115 @@ namespace Microsoft.Spark.CSharp.Services
            XmlConfigurator.Configure();
        }

+        /// <summary>
+        /// Initializes a instance of Log4NetLoggerService with a specific type.
+        /// </summary>
+        /// <param name="type">The type of the logger</param>
        public Log4NetLoggerService(Type type)
        {
            logger = LogManager.GetLogger(type);
            log4net.GlobalContext.Properties["pid"] = Process.GetCurrentProcess().Id;
        }

+        /// <summary>
+        /// Logs a message at debug level.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogDebug(string message)
        {
            logger.Debug(message);
        }

+        /// <summary>
+        /// Logs a message at debug level with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogDebug(string messageFormat, params object[] messageParameters)
+        {
+            logger.DebugFormat(messageFormat, messageParameters);
+        }
+
+        /// <summary>
+        /// Logs a message at info level.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogInfo(string message)
        {
            logger.Info(message);
        }

+        /// <summary>
+        /// Logs a message at info level with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogInfo(string messageFormat, params object[] messageParameters)
+        {
+            logger.InfoFormat(messageFormat, messageParameters);
+        }
+
+        /// <summary>
+        /// Logs a message at warning level.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogWarn(string message)
        {
            logger.Warn(message);
        }

+        /// <summary>
+        /// Logs a message at warning level with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogWarn(string messageFormat, params object[] messageParameters)
+        {
+            logger.WarnFormat(messageFormat, messageParameters);
+        }
+
+        /// <summary>
+        /// Logs a fatal message.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogFatal(string message)
        {
            logger.Fatal(message);
        }

+        /// <summary>
+        /// Logs a fatal message with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogFatal(string messageFormat, params object[] messageParameters)
+        {
+            logger.FatalFormat(messageFormat, messageParameters);
+        }
+
+        /// <summary>
+        /// Logs a error message.
+        /// </summary>
+        /// <param name="message">The message to be logged</param>
        public void LogError(string message)
        {
            logger.Error(message);
        }

+        /// <summary>
+        /// Logs a error message with a format string.
+        /// </summary>
+        /// <param name="messageFormat">The format string</param>
+        /// <param name="messageParameters">The array of arguments</param>
+        public void LogError(string messageFormat, params object[] messageParameters)
+        {
+            logger.ErrorFormat(messageFormat, messageParameters);
+        }
+
+        /// <summary>
+        /// Logs an exception
+        /// </summary>
+        /// <param name="e">The exception to be logged</param>
        public void LogException(Exception e)
        {
            
@ -92,7 +176,12 @@ namespace Microsoft.Spark.CSharp.Services
            }
            
        }
-         
+
+        /// <summary>
+        /// Get an instance of ILoggerService by a given type of logger
+        /// </summary>
+        /// <param name="type">The type of a logger to return</param>
+        /// <returns>An instance of ILoggerService</returns>
        public ILoggerService GetLoggerInstance(Type type)
        {
            return new Log4NetLoggerService(type);
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Services/LoggerServiceFactory.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Services/LoggerServiceFactory.cs
@ -12,11 +12,23 @@ namespace Microsoft.Spark.CSharp.Services
    public class LoggerServiceFactory
    {
        private static ILoggerService loggerService = DefaultLoggerService.Instance;
+        
+        /// <summary>
+        /// Overrides an existing logger by a given logger service instance
+        /// </summary>
+        /// <param name="loggerServiceOverride">The logger service instance used to overrides</param>
        public static void SetLoggerService(ILoggerService loggerServiceOverride)
        {
            loggerService = loggerServiceOverride;
+            var logger = GetLogger(typeof(LoggerServiceFactory));
+            logger.LogInfo("Logger service configured to use {0}", logger.GetType().Name);
        }

+        /// <summary>
+        /// Gets an instance of logger service for a given type.
+        /// </summary>
+        /// <param name="type">The type of logger service to get</param>
+        /// <returns>An instance of logger service</returns>
        public static ILoggerService GetLogger(Type type)
        {
            return  loggerService.GetLoggerInstance(type);
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Column.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Column.cs
@ -10,6 +10,9 @@ using Microsoft.Spark.CSharp.Interop;

 namespace Microsoft.Spark.CSharp.Sql
 {
+    /// <summary>
+    /// A column that will be computed based on the data in a DataFrame.
+    /// </summary>
    public class Column
    {
        private readonly IColumnProxy columnProxy;
@ -27,81 +30,179 @@ namespace Microsoft.Spark.CSharp.Sql
            this.columnProxy = columnProxy;
        }

+        /// <summary>
+        /// The logical negation operator that negates its operand.
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <returns>true if and only if its operand is false</returns>
        public static Column operator !(Column self)
        {
            return new Column(self.columnProxy.FuncOp("not"));
        }

+        /// <summary>
+        /// Negation of itself.
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <returns>The nagation of itself</returns>
        public static Column operator -(Column self)
        {
            return new Column(self.columnProxy.FuncOp("negate"));
        }

+        /// <summary>
+        /// Sum of this expression and another expression.
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <param name="other">The other object to compute</param>
+        /// <returns>The result of sum</returns>
        public static Column operator +(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("plus", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// Subtraction of this expression and another expression.
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <param name="other">The other object to compute</param>
+        /// <returns>The result of subtraction</returns>
        public static Column operator -(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("minus", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// Multiplication of this expression and another expression.
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <param name="other">The other object to compute</param>
+        /// <returns>The result of multiplication</returns>
        public static Column operator *(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("multiply", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// Division this expression by another expression.
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <param name="other">The other object to compute</param>
+        /// <returns>The result of division</returns>
        public static Column operator /(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("divide", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// Modulo (a.k.a. remainder) expression. 
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <param name="other">The other object to compute</param>
+        /// <returns>The remainder after dividing column self by other</returns>
        public static Column operator %(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("mod", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        ///  The equality operator returns true if the values of its operands are equal, false otherwise.
+        /// </summary>
+        /// <param name="self">The column self to compare</param>
+        /// <param name="other">The other object to compare</param>
+        /// <returns>true if the value of self is the same as the value of other; otherwise, false.</returns>
        public static Column operator ==(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("equalTo", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// The inequality operator returns false if its operands are equal, true otherwise.
+        /// </summary>
+        /// <param name="self">The column self to compare</param>
+        /// <param name="other">The other object to compare</param>
+        /// <returns>true if the value of self is different from the value of other; otherwise, false.</returns>
        public static Column operator !=(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("notEqual", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// The "less than" relational operator that returns true if the first operand 
+        /// is less than the second, false otherwise.
+        /// </summary>
+        /// <param name="self">The column self to compare</param>
+        /// <param name="other">The other object to compare</param>
+        /// <returns>true if the value of self is less than the value of other; otherwise, false.</returns>
        public static Column operator <(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("lt", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// The "less than or equal" relational operator that returns true if the first operand 
+        /// is less than or equal to the second, false otherwise.
+        /// </summary>
+        /// <param name="self">The column self to compare</param>
+        /// <param name="other">The other object to compare</param>
+        /// <returns>true if the value of self is less than or equal to the value of other; otherwise, false.</returns>
        public static Column operator <=(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("leq", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// The "greater than or equal" relational operator that returns true if the first operand 
+        /// is greater than or equal to the second, false otherwise.
+        /// </summary>
+        /// <param name="self">The column self to compare</param>
+        /// <param name="other">The other object to compare</param>
+        /// <returns>true if the value of self is greater than or equal to the value of other; otherwise, false.</returns>
        public static Column operator >=(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("geq", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// The "greater than" relational operator that returns true if the first operand 
+        /// is greater than the second, false otherwise.
+        /// </summary>
+        /// <param name="self">The column self to compare</param>
+        /// <param name="other">The other object to compare</param>
+        /// <returns>true if the value of self is greater than the value of other; otherwise, false.</returns>
        public static Column operator >(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("gt", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// Compute bitwise OR of this expression with another expression.
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <param name="other">The other object to compute</param>
+        /// <returns>false if and only if both its operands are false; otherwise, true</returns>
        public static Column operator |(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("bitwiseOR", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// Compute bitwise AND of this expression with another expression.
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <param name="other">The other object to compute</param>
+        /// <returns>true if and only if both its operands are true; otherwise, false</returns>
        public static Column operator &(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("bitwiseAND", (other is Column) ? ((Column)other).columnProxy : other));
        }

+        /// <summary>
+        /// Compute bitwise XOR of this expression with another expression.
+        /// </summary>
+        /// <param name="self">The column self to compute</param>
+        /// <param name="other">The other object to compute</param>
+        /// <returns>true if and only if exactly one of its operands is true; otherwise, false</returns>
        public static Column operator ^(Column self, object other)
        {
            return new Column(self.columnProxy.BinOp("bitwiseXOR", (other is Column) ? ((Column)other).columnProxy : other));
@ -167,20 +268,39 @@ namespace Microsoft.Spark.CSharp.Sql
            return new Column(columnProxy.BinOp("endsWith", other.columnProxy));
        }

+        /// <summary>
+        /// Returns a sort expression based on the ascending order.
+        /// </summary>
+        /// <returns>A column with ascending order</returns>
        public Column Asc()
        {
            return new Column(columnProxy.UnaryOp("asc"));            
        }
+
+        /// <summary>
+        /// Returns a sort expression based on the descending order.
+        /// </summary>
+        /// <returns>A column with descending order</returns>
        public Column Desc()
        {
            return new Column(columnProxy.UnaryOp("desc"));
        }

+        /// <summary>
+        /// Returns this column aliased with a new name.
+        /// </summary>
+        /// <param name="alias">The name of alias</param>
+        /// <returns>A column aliased with the given name</returns>
        public Column Alias(string alias)
        {
            return new Column(columnProxy.InvokeMethod("as", alias));
        }

+        /// <summary>
+        /// Returns this column aliased with new names
+        /// </summary>
+        /// <param name="aliases">The array of names for aliases</param>
+        /// <returns>A column aliased with the given names</returns>
        public Column Alias(string[] aliases)
        {
            return new Column(columnProxy.InvokeMethod("as", new object[] { aliases }));
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
@ -7,6 +7,7 @@ using System.Globalization;
 using System.Linq;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Services;

 namespace Microsoft.Spark.CSharp.Sql
 {
@ -18,6 +19,9 @@ namespace Microsoft.Spark.CSharp.Sql
    [Serializable]
    public class DataFrame
    {
+        [NonSerialized]
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(DataFrame));
+
        [NonSerialized]
        private readonly IDataFrameProxy dataFrameProxy;
        [NonSerialized]
@ -33,6 +37,9 @@ namespace Microsoft.Spark.CSharp.Sql
        [NonSerialized]
        private readonly Random random = new Random();

+        /// <summary>
+        /// Represents the content of the DataFrame as an RDD of Rows.
+        /// </summary>
        public RDD<Row> Rdd
        {
            get
@ -59,6 +66,9 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Returns true if the collect and take methods can be run locally (without any Spark executors).
+        /// </summary>
        public bool IsLocal
        {
            get
@ -84,11 +94,18 @@ namespace Microsoft.Spark.CSharp.Sql
            get { return dataFrameProxy;  }
        }

+        /// <summary>
+        /// Returns the schema of this DataFrame.
+        /// </summary>
        public StructType Schema
        {
            get { return schema ?? (schema = new StructType(dataFrameProxy.GetSchema())); }
        }

+        /// <summary>
+        /// Returns a column for a given column name.
+        /// </summary>
+        /// <param name="columnName">The name of column</param>
        public Column this[string columnName]
        {
            get
@ -119,6 +136,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// <returns>row count</returns>
        public long Count()
        {
+            logger.LogInfo("Calculating the number of rows in the dataframe");
            return dataFrameProxy.Count();
        }

@ -129,6 +147,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// <param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
        public void Show(int numberOfRows = 20, bool truncate = true)
        {
+            logger.LogInfo("Writing {0} rows in the DataFrame to Console output", numberOfRows);
            Console.WriteLine(dataFrameProxy.GetShowString(numberOfRows, truncate));
        }

@ -138,6 +157,7 @@ namespace Microsoft.Spark.CSharp.Sql
        public void ShowSchema()
        {
            var nameTypeList = Schema.Fields.Select(structField => structField.SimpleString);
+            logger.LogInfo("Writing Schema to Console output");
            Console.WriteLine(string.Join(", ", nameTypeList));
        }

@ -641,19 +661,69 @@ namespace Microsoft.Spark.CSharp.Sql
            }
            if (ascending != null)
            {
-                if(columns.Length != ascending.Length)
-                    throw new ArgumentException("ascending should have the same length with columns");
-
-                var columnsWithOrder = new Column[columns.Length];
-                for (var i = 0; i < columns.Length; i++)
-                {
-                    columnsWithOrder[i] = ascending[i] ? columns[i].Asc() : columns[i].Desc();
-                }
-                return new DataFrame(dataFrameProxy.Sort(columnsWithOrder.Select(c => c.ColumnProxy).ToArray()), sparkContext);
+                var sortedColumns = SortColumns(columns, ascending);
+                return new DataFrame(dataFrameProxy.Sort(sortedColumns.Select(c => c.ColumnProxy).ToArray()), sparkContext);
            }
            return new DataFrame(dataFrameProxy.Sort(columns.Select(c => c.ColumnProxy).ToArray()), sparkContext);
        }

+        /// <summary>
+        /// Returns a new DataFrame sorted by the specified column(s).
+        /// Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)
+        /// </summary>
+        /// <param name="columns">List of Columns to sort by</param>
+        /// <param name="ascending">List of boolean to specify multiple sort orders for <paramref name="columns"/>, TRUE for ascending, FALSE for descending.
+        /// if not null, it will overwrite the order specified by Column.Asc() or Column Desc() in <paramref name="columns"/>, </param>
+        /// <returns>A new DataFrame sorted by the specified column(s)</returns>
+        public DataFrame SortWithinPartitions(string[] columns, bool[] ascending = null)
+        {
+            if (columns == null || columns.Length == 0)
+            {
+                throw new ArgumentException("should sort by at least one column.");
+            }
+            if (ascending != null)
+            {
+                var sortedColumns = SortColumns(columns.Select(c => this[c]).ToArray(), ascending);
+                return new DataFrame(dataFrameProxy.SortWithinPartitions(sortedColumns.Select(c => c.ColumnProxy).ToArray()), sparkContext);
+            }
+            return new DataFrame(dataFrameProxy.SortWithinPartitions(columns.Select(c => this[c].ColumnProxy).ToArray()), sparkContext);
+        }
+
+        /// <summary>
+        /// Returns a new DataFrame sorted by the specified column(s).
+        /// Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)
+        /// </summary>
+        /// <param name="columns">List of Columns to sort by</param>
+        /// <param name="ascending">List of boolean to specify multiple sort orders for <paramref name="columns"/>, TRUE for ascending, FALSE for descending.
+        /// if not null, it will overwrite the order specified by Column.Asc() or Column Desc() in <paramref name="columns"/>, </param>
+        /// <returns>A new DataFrame sorted by the specified column(s)</returns>
+        public DataFrame SortWithinPartition(Column[] columns, bool[] ascending = null)
+        {
+            if (columns == null || columns.Length == 0)
+            {
+                throw new ArgumentException("should sort by at least one column.");
+            }
+            if (ascending != null)
+            {
+                var sortedColumns = SortColumns(columns, ascending);
+                return new DataFrame(dataFrameProxy.SortWithinPartitions(sortedColumns.Select(c => c.ColumnProxy).ToArray()), sparkContext);
+            }
+            return new DataFrame(dataFrameProxy.SortWithinPartitions(columns.Select(c => c.ColumnProxy).ToArray()), sparkContext);
+        }
+
+        private Column[] SortColumns(Column[] columns, bool[] ascending)
+        {
+            if (columns.Length != ascending.Length)
+                throw new ArgumentException("ascending should have the same length with columns");
+
+            var columnsWithOrder = new Column[columns.Length];
+            for (var i = 0; i < columns.Length; i++)
+            {
+                columnsWithOrder[i] = ascending[i] ? columns[i].Asc() : columns[i].Desc();
+            }
+            return columnsWithOrder;
+        }
+
        /// <summary>
        /// Returns a new DataFrame with an alias set.
        /// Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, alias(self, alias) 
@ -877,6 +947,32 @@ namespace Microsoft.Spark.CSharp.Sql
            return new DataFrame(dataFrameProxy.Repartition(numPartitions), sparkContext);
        }

+        /// <summary>
+        /// Returns a new [[DataFrame]] partitioned by the given partitioning columns into <paramref name="numPartitions"/>. The resulting DataFrame is hash partitioned.
+        /// <param name="columns"></param>
+        /// <param name="numPartitions">optional. If not specified, keep current partitions.</param>
+        /// </summary>
+        // Python API: https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py repartition(self, numPartitions)
+        public DataFrame Repartition(string[] columns, int numPartitions = 0)
+        {
+            return numPartitions == 0 ?
+                new DataFrame(dataFrameProxy.Repartition(columns.Select(c => this[c].ColumnProxy).ToArray()), sparkContext) :
+                new DataFrame(dataFrameProxy.Repartition(numPartitions, columns.Select(c => this[c].ColumnProxy).ToArray()), sparkContext);
+        }
+
+        /// <summary>
+        /// Returns a new [[DataFrame]] partitioned by the given partitioning columns into <paramref name="numPartitions"/>. The resulting DataFrame is hash partitioned.
+        /// <param name="columns"></param>
+        /// <param name="numPartitions">optional. If not specified, keep current partitions.</param>
+        /// </summary>
+        // Python API: https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py repartition(self, numPartitions)
+        public DataFrame Repartition(Column[] columns, int numPartitions = 0)
+        {
+            return numPartitions == 0 ?
+                new DataFrame(dataFrameProxy.Repartition(columns.Select(c => c.ColumnProxy).ToArray()), sparkContext) :
+                new DataFrame(dataFrameProxy.Repartition(numPartitions, columns.Select(c => c.ColumnProxy).ToArray()), sparkContext);
+        }
+
        /// <summary>
        /// Returns a new DataFrame by sampling a fraction of rows.
        /// </summary>
@ -954,6 +1050,7 @@ namespace Microsoft.Spark.CSharp.Sql
        // write(self)
        public DataFrameWriter Write()
        {
+            logger.LogInfo("Using DataFrameWriter to write output data to external data storage");
            return new DataFrameWriter(dataFrameProxy.Write());
        }

@ -1059,8 +1156,14 @@ namespace Microsoft.Spark.CSharp.Sql
        } 
    }

+    /// <summary>
+    /// The type of join operation for DataFrame
+    /// </summary>
    public class JoinType
    {
+        /// <summary>
+        /// Get the string that represents a join type
+        /// </summary>
        public string Value { get; private set; }
        private JoinType(string value)
        {
@ -1073,6 +1176,9 @@ namespace Microsoft.Spark.CSharp.Sql
        private static readonly JoinType RightOuterJoinType = new JoinType("right_outer");
        private static readonly JoinType LeftSemiJoinType = new JoinType("leftsemi");

+        /// <summary>
+        /// Inner join
+        /// </summary>
        public static JoinType Inner
        {
            get
@ -1081,6 +1187,9 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Outer join
+        /// </summary>
        public static JoinType Outer
        {
            get
@ -1089,6 +1198,9 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Left outer join
+        /// </summary>
        public static JoinType LeftOuter
        {
            get
@ -1097,6 +1209,9 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Right outer join
+        /// </summary>
        public static JoinType RightOuter
        {
            get
@ -1105,6 +1220,9 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Left semi join
+        /// </summary>
        public static JoinType LeftSemi
        {
            get
@ -1114,6 +1232,9 @@ namespace Microsoft.Spark.CSharp.Sql
        }
    }

+    /// <summary>
+    /// A set of methods for aggregations on a DataFrame, created by DataFrame.groupBy.
+    /// </summary>
    public class GroupedData
    {
        internal IGroupedDataProxy GroupedDataProxy
@ -1130,36 +1251,79 @@ namespace Microsoft.Spark.CSharp.Sql
            this.dataFrame = dataFrame;
        }

+        /// <summary>
+        /// Compute aggregates by specifying a dictionary from column name to aggregate methods.
+        /// The available aggregate methods are avg, max, min, sum, count.
+        /// </summary>
+        /// <param name="columnNameAggFunctionDictionary">The dictionary of column name to aggregate method</param>
+        /// <returns>The DataFrame object that contains the grouping columns.</returns>
        public DataFrame Agg(Dictionary<string, string> columnNameAggFunctionDictionary)
        {
            return new DataFrame(dataFrame.DataFrameProxy.Agg(groupedDataProxy, columnNameAggFunctionDictionary), dataFrame.SparkContext);
        }

+        /// <summary>
+        /// Count the number of rows for each group.
+        /// </summary>
+        /// <returns>The DataFrame object that contains the grouping columns.</returns>
        public DataFrame Count()
        {
            return new DataFrame(groupedDataProxy.Count(), dataFrame.SparkContext);
        }

+        /// <summary>
+        /// Compute the average value for each numeric columns for each group.
+        /// This is an alias for avg.
+        /// When specified columns are given, only compute the average values for them. 
+        /// </summary>
+        /// <param name="columns">The name of columns to be computed.</param>
+        /// <returns>The DataFrame object that contains the grouping columns.</returns>
        public DataFrame Mean(params string[] columns)
        {
            return new DataFrame(groupedDataProxy.Mean(columns), dataFrame.SparkContext);
        }

+        /// <summary>
+        /// Compute the max value for each numeric columns for each group.
+        /// When specified columns are given, only compute the max values for them.
+        /// </summary>
+        /// <param name="columns"> The name of columns to be computed.</param>
+        /// <returns>The DataFrame object that contains the grouping columns.</returns>
        public DataFrame Max(params string[] columns)
        {
            return new DataFrame(groupedDataProxy.Max(columns), dataFrame.SparkContext);
        }

+        /// <summary>
+        /// Compute the min value for each numeric column for each group.
+        /// </summary>
+        /// <param name="columns">
+        /// The name of columns to be computed. When specified columns are
+        /// given, only compute the min values for them. 
+        /// </param>
+        /// <returns>The DataFrame object that contains the grouping columns.</returns>
        public DataFrame Min(params string[] columns)
        {
            return new DataFrame(groupedDataProxy.Min(columns), dataFrame.SparkContext);
        }

+        /// <summary>
+        /// Compute the mean value for each numeric columns for each group.
+        /// When specified columns are given, only compute the mean values for them. 
+        /// </summary>
+        /// <param name="columns">The name of columns to be computed</param>
+        /// <returns>The DataFrame object that contains the grouping columns.</returns>
        public DataFrame Avg(params string[] columns)
        {
            return new DataFrame(groupedDataProxy.Avg(columns), dataFrame.SparkContext);
        }

+        /// <summary>
+        /// Compute the sum for each numeric columns for each group.
+        /// When specified columns are given, only compute the sum for them.
+        /// </summary>
+        /// <param name="columns">The name of columns to be computed</param>
+        /// <returns>The DataFrame object that contains the grouping columns.</returns>
        public DataFrame Sum(params string[] columns)
        {
            return new DataFrame(groupedDataProxy.Sum(columns), dataFrame.SparkContext);
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs
@ -5,6 +5,7 @@ using System;
 using System.Collections.Generic;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Services;

 namespace Microsoft.Spark.CSharp.Sql
 {
@ -14,6 +15,8 @@ namespace Microsoft.Spark.CSharp.Sql
    /// </summary>
    public class DataFrameReader
    {
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(DataFrameReader));
+
        private readonly IDataFrameReaderProxy dataFrameReaderProxy;
        private readonly SparkContext sparkContext;

@ -27,6 +30,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// </summary>
        public DataFrameReader Format(string source)
        {
+            logger.LogInfo("Input data source format for the reader is '{0}'", source);
            dataFrameReaderProxy.Format(source);
            return this;
        }
@ -48,6 +52,7 @@ namespace Microsoft.Spark.CSharp.Sql
        public DataFrameReader Option(string key, string value)
        {
            dataFrameReaderProxy.Options(new Dictionary<string, string>(){{key, value}});
+            logger.LogInfo("Input key-vaue option for the data source is {0}={1}", key, value);
            return this;
        }

@ -75,6 +80,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// </summary>
        public DataFrame Load()
        {
+            logger.LogInfo("Loading DataFrame using the reader");
            return new DataFrame(dataFrameReaderProxy.Load(), sparkContext);
        }

@ -84,6 +90,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// </summary>
        public DataFrame Jdbc(string url, string table, Dictionary<String, String> properties)
        {
+            logger.LogInfo("Constructing DataFrame using JDBC source. Url={0}, tableName={1}", url, table);
            return new DataFrame(dataFrameReaderProxy.Jdbc(url, table, properties), sparkContext);
        }

@ -106,6 +113,7 @@ namespace Microsoft.Spark.CSharp.Sql
        public DataFrame Jdbc(string url, string table, string columnName, string lowerBound, string upperBound, 
            int numPartitions, Dictionary<String, String> connectionProperties)
        {
+            logger.LogInfo("Constructing DataFrame using JDBC source. Url={0}, tableName={1}, columnName={2}", url, table, columnName);
            return new DataFrame(dataFrameReaderProxy.Jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, connectionProperties), sparkContext);
        }

@ -125,6 +133,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// Normally at least a "user" and "password" property should be included.</param>
        public DataFrame Jdbc(string url, string table, string[] predicates, Dictionary<String, String> connectionProperties)
        {
+            logger.LogInfo("Constructing DataFrame using JDBC source. Url={0}, table={1}", url, table);
            return new DataFrame(dataFrameReaderProxy.Jdbc(url, table, predicates, connectionProperties), sparkContext);
        }

@ -137,6 +146,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// <param name="path">input path</param>
        public DataFrame Json(string path)
        {
+            logger.LogInfo("Constructing DataFrame using JSON source {0}", path);
            return Format("json").Load(path);
        }

@ -146,6 +156,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// </summary>
        public DataFrame Parquet(params string[] path)
        {
+            logger.LogInfo("Constructing DataFrame using Parquet source {0}", string.Join(";", path));
            return new DataFrame(dataFrameReaderProxy.Parquet(path), sparkContext);
        }
    }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs
@ -3,6 +3,7 @@

 using System.Collections.Generic;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Services;

 namespace Microsoft.Spark.CSharp.Sql
 {
@ -14,6 +15,7 @@ namespace Microsoft.Spark.CSharp.Sql
    /// </summary>
    public class DataFrameWriter
    {
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(DataFrameWriter));
        internal IDataFrameWriterProxy DataFrameWriterProxy
        {
            get { return dataFrameWriterProxy; }
@ -56,6 +58,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// </summary>
        public DataFrameWriter Format(string source)
        {
+            logger.LogInfo("Output data storage format for the writer is '{0}'", source);
            dataFrameWriterProxy.Format(source);
            return this;
        }
@ -66,6 +69,7 @@ namespace Microsoft.Spark.CSharp.Sql
        public DataFrameWriter Option(string key, string value)
        {
            var options = new Dictionary<string, string>() { { key, value } };
+            logger.LogInfo("Output key-vaue option for the external data stroage is {0}={1}", key, value);
            return Options(options);
        }

--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs
@ -19,110 +19,272 @@ namespace Microsoft.Spark.CSharp.Sql
    public static class Functions
    {
        #region functions
+        /// <summary>
+        /// Creates a Column of any literal value.
+        /// </summary>
+        /// <param name="column">The given literal value</param>
+        /// <returns>A new Column is created to represent the literal value</returns>
        public static Column Lit(object column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("lit", column));
        }
+
+        /// <summary>
+        /// Returns a Column based on the given column name.
+        /// </summary>
+        /// <param name="colName">The name of column specified</param>
+        /// <returns>The column for the given name</returns>
        public static Column Col(string colName)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("col", colName));
        }
+
+        /// <summary>
+        /// Returns a Column based on the given column name.
+        /// </summary>
+        /// <param name="colName">The name of column specified</param>
+        /// <returns>The column for the given name</returns>
        public static Column Column(string colName)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("column", colName));
        }
+
+        /// <summary>
+        /// Returns a sort expression based on ascending order of the column.
+        /// </summary>
+        /// <param name="columnName">The name of column specified</param>
+        /// <returns>The column with ascending order</returns>
        public static Column Asc(string columnName)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("asc", columnName));
        }
+
+        /// <summary>
+        /// Returns a sort expression based on the descending order of the column.
+        /// </summary>
+        /// <param name="columnName">The name of column specified</param>
+        /// <returns>the column with descending order</returns>
        public static Column Desc(string columnName)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("desc", columnName));
        }
+
+        /// <summary>
+        /// Converts a string column to upper case.
+        /// </summary>
+        /// <param name="column">The string column specified</param>
+        /// <returns>The string column in upper case</returns>
        public static Column Upper(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("upper", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Converts a string column to lower case.
+        /// </summary>
+        /// <param name="column">The string column specified</param>
+        /// <returns>The string column in lower case</returns>
        public static Column Lower(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("lower", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the square root of the specified float column.
+        /// </summary>
+        /// <param name="column">The float column</param>
+        /// <returns>The square root of the specified float column.</returns>
        public static Column Sqrt(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("sqrt", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the absolute value.
+        /// </summary>
+        /// <param name="column">The column to compute</param>
+        /// <returns>The new column represents the absolute value of the given column</returns>
        public static Column Abs(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("abs", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the maximum value of the expression in a group.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column reprents the maximum value</returns>
        public static Column Max(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("max", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the minimum value of the expression in a group.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column represents the minimum value</returns>
        public static Column Min(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("min", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the first value in a group. 
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column represents the first value</returns>
        public static Column First(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("first", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the last value in a group.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column represents the last value</returns>
        public static Column Last(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("last", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the number of items in a group.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column represents the count value</returns>
        public static Column Count(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("count", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the sum of all values in the expression.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column represents the sum</returns>
        public static Column Sum(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("sum", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the average of the values in a group.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column represents the average</returns>
        public static Column Avg(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("avg", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the average of the values in a group.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column represents the average</returns>
        public static Column Mean(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("mean", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the sum of distinct values in the expression.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column represents the sum of distinct values </returns>
        public static Column SumDistinct(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("sumDistinct", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Creates a new array column. The input columns must all have the same data type. 
+        /// </summary>
+        /// <param name="columns">The given columns</param>
+        /// <returns>The new array column</returns>
        public static Column Array(params Column[] columns)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("array", columns.Select(x => x.ColumnProxy)));
        }
+
+        /// <summary>
+        /// Returns the first column that is not null, or null if all inputs are null.
+        /// </summary>
+        /// <param name="columns">The given columns</param>
+        /// <returns>The first column that is not null</returns>
        public static Column Coalesce(params Column[] columns)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("coalesce", columns.Select(x => x.ColumnProxy)));
        }
+
+        /// <summary>
+        ///  Returns the number of distinct items in a group.
+        /// </summary>
+        /// <param name="columns">The given columns</param>
+        /// <returns>The new column represents the number of distinct items</returns>
        public static Column CountDistinct(params Column[] columns)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("countDistinct", columns.Select(x => x.ColumnProxy)));
        }
+
+        /// <summary>
+        /// Creates a new struct column.
+        /// </summary>
+        /// <param name="columns">The given columns</param>
+        /// <returns>The new struct column</returns>
        public static Column Struct(params Column[] columns)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("struct", columns.Select(x => x.ColumnProxy)));
        }
+
+        /// <summary>
+        /// Returns the approximate number of distinct items in a group
+        /// </summary>
+        /// <param name="column">The given columns</param>
+        /// <returns>The column represents the approximate number of distinct items</returns>
        public static Column ApproxCountDistinct(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("approxCountDistinct", column));
        }
+
+        /// <summary>
+        /// Creates a new row for each element in the given array or map column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The new column for each element in the given array or map column</returns>
        public static Column Explode(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("explode", column));
        }
+
+        /// <summary>
+        /// Generate a random column with i.i.d. samples from U[0.0, 1.0].
+        /// </summary>
+        /// <param name="seed">The long integer as seed</param>
+        /// <returns>A random column with i.i.d. samples from U[0.0, 1.0]. </returns>
        public static Column Rand(long seed)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("rand", seed));
        }
+
+        /// <summary>
+        /// Generate a column with i.i.d. samples from the standard normal distribution. 
+        /// </summary>
+        /// <param name="seed">The long integer as seed</param>
+        /// <returns>A column with i.i.d. samples from the standard normal distribution</returns>
        public static Column Randn(long seed)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("randn", seed));
        }
+
+        /// <summary>
+        /// Returns the ntile group id (from 1 to n inclusive) in an ordered window partition.
+        /// This is equivalent to the NTILE function in SQL. 
+        /// </summary>
+        /// <param name="n">The given number</param>
+        /// <returns>The ntile group id</returns>
        public static Column Ntile(int n)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("ntile", n));
@ -130,100 +292,221 @@ namespace Microsoft.Spark.CSharp.Sql
        #endregion

        #region unary math functions
+        /// <summary>
+        /// Computes the cosine inverse of the given column; the returned angle is in the range 0.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the cosine inverse</returns>
        public static Column Acos(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("acos", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the sine inverse of the given column; the returned angle is in the range -pi/2 through pi/2.
+        /// </summary>
+        /// <param name="column"></param>
+        /// <returns>The column represents the sine inverse</returns>
        public static Column Asin(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("asin", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the tangent inverse of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the tangent inverse</returns>
        public static Column Atan(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("atan", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the cube-root of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the cube-root</returns>
        public static Column Cbrt(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("cbrt", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the ceiling of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the ceiling</returns>
        public static Column Ceil(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("ceil", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the cosine of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the cosine</returns>
        public static Column Cos(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("cos", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the hyperbolic cosine of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the hyperbolic cosine</returns>
        public static Column Cosh(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("cosh", column.ColumnProxy));
        }
+
        /// <summary>
-        /// Computes the exponential of the given value.
+        /// Computes the exponential of the given column.
        /// </summary>
-        /// <param name="column"></param>
-        /// <returns></returns>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the exponential</returns>
        public static Column Exp(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("exp", column.ColumnProxy));
        }
+
        /// <summary>
-        /// Computes the exponential of the given value minus one.
+        /// Computes the exponential of the given value minus column.
        /// </summary>
-        /// <param name="column"></param>
-        /// <returns></returns>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the exponential</returns>
        public static Column Expm1(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("expm1", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the floor of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the floor</returns>
        public static Column Floor(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("floor", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the natural logarithm of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the natural logarithm</returns>
        public static Column Log(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("log", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the logarithm of the given column in base 10.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the logarithm</returns>
        public static Column Log10(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("log10", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the natural logarithm of the given column plus one.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the logarithm</returns>
        public static Column Log1p(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("log1p", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the double value that is closest in value to the argument and is equal to a mathematical integer.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the double value</returns>
        public static Column Rint(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("rint", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the signum of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the signum</returns>
        public static Column Signum(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("signum", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the sine of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the sine</returns>
        public static Column Sin(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("sin", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the hyperbolic sine of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the hyperbolic sine</returns>
        public static Column Sinh(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("sinh", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the tangent of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the tangent</returns>
        public static Column Tan(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("tan", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes the hyperbolic tangent of the given column.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the hyperbolic tangent</returns>
        public static Column Tanh(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("tanh", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the degrees</returns>
        public static Column ToDegrees(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("toDegrees", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column represents the radians</returns>
        public static Column ToRadians(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("toRadians", column.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes bitwise NOT.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <returns>The column of bitwise NOT result</returns>
        public static Column BitwiseNOT(Column column)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateFunction("bitwiseNOT", column.ColumnProxy));
@ -231,46 +514,122 @@ namespace Microsoft.Spark.CSharp.Sql
        #endregion

        #region binary math functions
+        /// <summary>
+        /// Returns the angle theta from the conversion of rectangular coordinates (x, y) to polar coordinates (r, theta).
+        /// </summary>
+        /// <param name="leftColumn">The left column</param>
+        /// <param name="rightColumn">The right column</param>
+        /// <returns>The column of the result</returns>
        public static Column Atan2(Column leftColumn, Column rightColumn)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("atan2", leftColumn.ColumnProxy, rightColumn.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes sqrt(a2 + b2) without intermediate overflow or underflow.
+        /// </summary>
+        /// <param name="leftColumn">The left column</param>
+        /// <param name="rightColumn">The right column</param>
+        /// <returns>The column of the result</returns>
        public static Column Hypot(Column leftColumn, Column rightColumn)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("hypot", leftColumn.ColumnProxy, rightColumn.ColumnProxy));
        }
+
+        /// <summary>
+        /// Computes sqrt(a2 + b2) without intermediate overflow or underflow.
+        /// </summary>
+        /// <param name="leftColumn">The left column</param>
+        /// <param name="rightValue">The right column</param>
+        /// <returns>The column of the result</returns>
        public static Column Hypot(Column leftColumn, double rightValue)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("hypot", leftColumn.ColumnProxy, rightValue));
        }
+
+        /// <summary>
+        /// Computes sqrt(a2 + b2) without intermediate overflow or underflow.
+        /// </summary>
+        /// <param name="leftValue">The left value</param>
+        /// <param name="rightColumn">The right column</param>
+        /// <returns>The column of the result</returns>
        public static Column Hypot(double leftValue, Column rightColumn)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("hypot", leftValue, rightColumn.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the value of the first argument raised to the power of the second argument.
+        /// </summary>
+        /// <param name="leftColumn">The left column</param>
+        /// <param name="rightColumn">The right column</param>
+        /// <returns>The column of the result</returns>
        public static Column Pow(Column leftColumn, Column rightColumn)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("pow", leftColumn.ColumnProxy, rightColumn.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the value of the first argument raised to the power of the second argument.
+        /// </summary>
+        /// <param name="leftColumn">The left column</param>
+        /// <param name="rightValue">The right value</param>
+        /// <returns>The column of the result</returns>
        public static Column Pow(Column leftColumn, double rightValue)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("pow", leftColumn.ColumnProxy, rightValue));
        }
+
+        /// <summary>
+        /// Returns the value of the first argument raised to the power of the second argument.
+        /// </summary>
+        /// <param name="leftValue">The left value</param>
+        /// <param name="rightColumn">The right column</param>
+        /// <returns>The column of the result</returns>
        public static Column Pow(double leftValue, Column rightColumn)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("pow", leftValue, rightColumn.ColumnProxy));
        }
+
+        /// <summary>
+        /// Returns the approximate number of distinct items in a group.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <param name="rsd">The rsd</param>
+        /// <returns>The column of the result</returns>
        public static Column ApproxCountDistinct(Column column, double rsd)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("approxCountDistinct", column, rsd));
        }
+
+        /// <summary>
+        /// Evaluates a list of conditions and returns one of multiple possible result expressions. 
+        /// </summary>
+        /// <param name="condition">The given column of condition</param>
+        /// <param name="value">The value of condition</param>
+        /// <returns>The column of the result</returns>
        public static Column When(Column condition, object value)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("when", condition, value));
        }
+
+        /// <summary>
+        /// Returns the value that is offset rows before the current row, and null if there is less than offset rows before the current row.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <param name="offset">The offset of the given column</param>
+        /// <returns>The column of the result</returns>
        public static Column Lag(Column column, int offset)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("lag", column, offset));
        }
+
+        /// <summary>
+        /// Returns the value that is offset rows after the current row, and null if there is less than offset rows after the current row.
+        /// </summary>
+        /// <param name="column">The given column</param>
+        /// <param name="offset">The offset of the given column</param>
+        /// <returns>The column of the result</returns>
        public static Column Lead(Column column, int offset)
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateBinaryMathFunction("lead", column, offset));
@ -278,38 +637,83 @@ namespace Microsoft.Spark.CSharp.Sql
        #endregion

        #region window functions
+        /// <summary>
+        /// Returns a sequential number starting at 1 within a window partition.
+        /// </summary>
+        /// <returns>The column of the result</returns>
        public static Column RowNumber()
        {
-            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("rowNumber"));
+            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("row_number"));
        }
+
+        /// <summary>
+        /// Returns the rank of rows within a window partition, without any gaps.
+        /// </summary>
+        /// <returns>The column of the result</returns>
        public static Column DenseRank()
        {
-            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("denseRank"));
+            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("dense_rank"));
        }
+
+        /// <summary>
+        ///  Returns the rank of rows within a window partition.
+        /// </summary>
+        /// <returns>The column of the result</returns>
        public static Column Rank()
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("rank"));
        }
+
+        /// <summary>
+        /// Returns the cumulative distribution of values within a window partition
+        /// </summary>
+        /// <returns>The column of the result</returns>
        public static Column CumeDist()
        {
-            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("cumeDist"));
+            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("cume_dist"));
        }
+
+        /// <summary>
+        /// Returns the relative rank (i.e. percentile) of rows within a window partition.
+        /// </summary>
+        /// <returns>The column of the result</returns>
        public static Column PercentRank()
        {
-            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("percentRank"));
+            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("percent_rank"));
        }
+
+        /// <summary>
+        /// A column expression that generates monotonically increasing 64-bit integers.
+        /// </summary>
+        /// <returns>The column of the result</returns>
        public static Column MonotonicallyIncreasingId()
        {
-            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("monotonicallyIncreasingId"));
+            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("monotonically_increasing_id"));
        }
+
+        /// <summary>
+        /// Partition ID of the Spark task.
+        /// Note that this is indeterministic because it depends on data partitioning and task scheduling.
+        /// </summary>
+        /// <returns>The column of the result</returns>
        public static Column SparkPartitionId()
        {
-            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("sparkPartitionId"));
+            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("spark_partition_id"));
        }
+
+        /// <summary>
+        /// Generate a random column with i.i.d. samples from U[0.0, 1.0]. 
+        /// </summary>
+        /// <returns>The column of the result</returns>
        public static Column Rand()
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("rand"));
        }
+
+        /// <summary>
+        /// Generate a column with i.i.d. samples from the standard normal distribution. 
+        /// </summary>
+        /// <returns>The column of the result</returns>
        public static Column Randn()
        {
            return new Column(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateWindowFunction("randn"));
@ -317,46 +721,188 @@ namespace Microsoft.Spark.CSharp.Sql
        #endregion

        #region udf
+        /// <summary>
+        /// Defines a user-defined function of 0 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column> Udf<RT>(Func<RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT>(f).Execute).Execute0;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 1 arguments as user-defined function (UDF). 
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column> Udf<RT, A1>(Func<A1, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1>(f).Execute).Execute1;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 2 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <typeparam name="A2">The 2nd arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column, Column> Udf<RT, A1, A2>(Func<A1, A2, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1, A2>(f).Execute).Execute2;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 3 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <typeparam name="A2">The 2nd arguement of the given function</typeparam>
+        /// <typeparam name="A3">The 3rd arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column, Column, Column> Udf<RT, A1, A2, A3>(Func<A1, A2, A3, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1, A2, A3>(f).Execute).Execute3;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 4 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <typeparam name="A2">The 2nd arguement of the given function</typeparam>
+        /// <typeparam name="A3">The 3rd arguement of the given function</typeparam>
+        /// <typeparam name="A4">The 4th arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column, Column, Column, Column> Udf<RT, A1, A2, A3, A4>(Func<A1, A2, A3, A4, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1, A2, A3, A4>(f).Execute).Execute4;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 5 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <typeparam name="A2">The 2nd arguement of the given function</typeparam>
+        /// <typeparam name="A3">The 3rd arguement of the given function</typeparam>
+        /// <typeparam name="A4">The 4th arguement of the given function</typeparam>
+        /// <typeparam name="A5">The 5th arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column, Column, Column, Column, Column> Udf<RT, A1, A2, A3, A4, A5>(Func<A1, A2, A3, A4, A5, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1, A2, A3, A4, A5>(f).Execute).Execute5;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 6 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <typeparam name="A2">The 2nd arguement of the given function</typeparam>
+        /// <typeparam name="A3">The 3rd arguement of the given function</typeparam>
+        /// <typeparam name="A4">The 4th arguement of the given function</typeparam>
+        /// <typeparam name="A5">The 5th arguement of the given function</typeparam>
+        /// <typeparam name="A6">The 6th arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column, Column, Column, Column, Column, Column> Udf<RT, A1, A2, A3, A4, A5, A6>(Func<A1, A2, A3, A4, A5, A6, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1, A2, A3, A4, A5, A6>(f).Execute).Execute6;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 7 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <typeparam name="A2">The 2nd arguement of the given function</typeparam>
+        /// <typeparam name="A3">The 3rd arguement of the given function</typeparam>
+        /// <typeparam name="A4">The 4th arguement of the given function</typeparam>
+        /// <typeparam name="A5">The 5th arguement of the given function</typeparam>
+        /// <typeparam name="A6">The 6th arguement of the given function</typeparam>
+        /// <typeparam name="A7">The 7th arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column, Column, Column, Column, Column, Column, Column> Udf<RT, A1, A2, A3, A4, A5, A6, A7>(Func<A1, A2, A3, A4, A5, A6, A7, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7>(f).Execute).Execute7;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 8 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <typeparam name="A2">The 2nd arguement of the given function</typeparam>
+        /// <typeparam name="A3">The 3rd arguement of the given function</typeparam>
+        /// <typeparam name="A4">The 4th arguement of the given function</typeparam>
+        /// <typeparam name="A5">The 5th arguement of the given function</typeparam>
+        /// <typeparam name="A6">The 6th arguement of the given function</typeparam>
+        /// <typeparam name="A7">The 7th arguement of the given function</typeparam>
+        /// <typeparam name="A8">The 8th arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column, Column, Column, Column, Column, Column, Column, Column> Udf<RT, A1, A2, A3, A4, A5, A6, A7, A8>(Func<A1, A2, A3, A4, A5, A6, A7, A8, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8>(f).Execute).Execute8;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 9 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <typeparam name="A2">The 2nd arguement of the given function</typeparam>
+        /// <typeparam name="A3">The 3rd arguement of the given function</typeparam>
+        /// <typeparam name="A4">The 4th arguement of the given function</typeparam>
+        /// <typeparam name="A5">The 5th arguement of the given function</typeparam>
+        /// <typeparam name="A6">The 6th arguement of the given function</typeparam>
+        /// <typeparam name="A7">The 7th arguement of the given function</typeparam>
+        /// <typeparam name="A8">The 8th arguement of the given function</typeparam>
+        /// <typeparam name="A9">The 9th arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column, Column, Column, Column, Column, Column, Column, Column, Column> Udf<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9>(Func<A1, A2, A3, A4, A5, A6, A7, A8, A9, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9>(f).Execute).Execute9;
        }
+
+        /// <summary>
+        /// Defines a user-defined function of 10 arguments as user-defined function (UDF).
+        /// The data types are automatically inferred based on the function's signature. 
+        /// </summary>
+        /// <param name="f">The given function</param>
+        /// <typeparam name="RT">The return type of the given function</typeparam>
+        /// <typeparam name="A1">The 1st arguement of the given function</typeparam>
+        /// <typeparam name="A2">The 2nd arguement of the given function</typeparam>
+        /// <typeparam name="A3">The 3rd arguement of the given function</typeparam>
+        /// <typeparam name="A4">The 4th arguement of the given function</typeparam>
+        /// <typeparam name="A5">The 5th arguement of the given function</typeparam>
+        /// <typeparam name="A6">The 6th arguement of the given function</typeparam>
+        /// <typeparam name="A7">The 7th arguement of the given function</typeparam>
+        /// <typeparam name="A8">The 8th arguement of the given function</typeparam>
+        /// <typeparam name="A9">The 9th arguement of the given function</typeparam>
+        /// <typeparam name="A10">The 10th arguement of the given function</typeparam>
+        /// <returns>The new user-defined function</returns>
        public static Func<Column, Column, Column, Column, Column, Column, Column, Column, Column, Column, Column> Udf<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(Func<A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT> f)
        {
            return new UserDefinedFunction<RT>(new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute).Execute10;
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs
@ -0,0 +1,42 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Proxy;
+
+namespace Microsoft.Spark.CSharp.Sql
+{
+    /// <summary>
+    /// A variant of Spark SQL that integrates with data stored in Hive. 
+    /// Configuration for Hive is read from hive-site.xml on the classpath.
+    /// It supports running both SQL and HiveQL commands.
+    /// </summary>
+    public class HiveContext : SqlContext
+    {
+        /// <summary>
+        /// Creates a HiveContext
+        /// </summary>
+        /// <param name="sparkContext"></param>
+        public HiveContext(SparkContext sparkContext) 
+            : base(sparkContext, sparkContext.SparkContextProxy.CreateHiveContext())
+        {
+        }
+
+        internal HiveContext(SparkContext sparkContext, ISqlContextProxy sqlContextProxy)
+            : base(sparkContext, sqlContextProxy)
+        {
+        }
+
+        /// <summary>
+        /// Invalidate and refresh all the cached the metadata of the given table.
+        /// For performance reasons, Spark SQL or the external data source library it uses
+        /// might cache certain metadata about a table, such as the location of blocks.
+        /// When those change outside of Spark SQL, users should call this function to invalidate the cache.
+        /// </summary>
+        /// <param name="tableName"></param>
+        public void RefreshTable(string tableName)
+        {
+            SqlContextProxy.RefreshTable(tableName);
+        }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs
@ -1,13 +1,15 @@
 // Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.

+using System;
+using System.Threading;
+
 using Razorvine.Pickle;

 namespace Microsoft.Spark.CSharp.Sql
 {
    /// <summary>
    /// Used by Unpickler to unpickle pickled objects. It is also used to construct a Row (C# representation of pickled objects).
-    /// Note this implementation is not ThreadSafe. Collect or RDD conversion where unpickling is done is not expected to be multithreaded.
    /// </summary>
    public class RowConstructor : IObjectConstructor
    {
@ -16,11 +18,13 @@ namespace Microsoft.Spark.CSharp.Sql
        /// <summary>
        /// Schema of the DataFrame currently being processed
        /// </summary>
+        [ThreadStatic] // thread safe is need when running in C# worker process
        private static string currentSchema; 

        /// <summary>
        /// Indicates if Schema is already set during construction of this type
        /// </summary>
+        [ThreadStatic] // thread safe is need when running in C# worker process
        private static bool isCurrentSchemaSet;

        /// <summary>
@ -33,6 +37,10 @@ namespace Microsoft.Spark.CSharp.Sql
        /// </summary>
        internal string Schema;

+        /// <summary>
+        /// Returns a string that represents the current object.
+        /// </summary>
+        /// <returns>A string that represents the current object.</returns>
        public override string ToString()
        {
            return string.Format("{{{0}}}", string.Join(",", Values));
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SaveMode.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SaveMode.cs
@ -40,6 +40,11 @@ namespace Microsoft.Spark.CSharp.Sql
    /// </summary>
    public static class SaveModeExtensions
    {
+        /// <summary>
+        /// Gets the string for the value of SaveMode
+        /// </summary>
+        /// <param name="mode">The given SaveMode</param>
+        /// <returns>The string that represents the given SaveMode</returns>
        public static string GetStringValue(this SaveMode mode)
        {
            switch (mode)
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
@ -5,6 +5,7 @@ using System;
 using System.Collections.Generic;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Services;

 namespace Microsoft.Spark.CSharp.Sql
 {
@ -14,13 +15,77 @@ namespace Microsoft.Spark.CSharp.Sql
    /// </summary>
    public class SqlContext
    {
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SqlContext));
+
        private readonly ISqlContextProxy sqlContextProxy;
        private readonly SparkContext sparkContext;
        internal ISqlContextProxy SqlContextProxy { get { return sqlContextProxy; } }
+
+        private static SqlContext instance;
+
+        /// <summary>
+        /// Creates a SqlContext
+        /// </summary>
+        /// <param name="sparkContext"></param>
        public SqlContext(SparkContext sparkContext)
        {
            this.sparkContext = sparkContext;
-            sqlContextProxy = sparkContext.SparkContextProxy.CreateSqlContext();  
+            sqlContextProxy = sparkContext.SparkContextProxy.CreateSqlContext();
+            if (instance == null) instance = this;
+        }
+
+        internal SqlContext(SparkContext sparkContext, ISqlContextProxy sqlContextProxy)
+        {
+            this.sparkContext = sparkContext;
+            this.sqlContextProxy = sqlContextProxy;
+            if (instance == null) instance = this;
+        }
+
+        /// <summary>
+        /// Get the existing SQLContext or create a new one with given SparkContext.
+        /// </summary>
+        /// <param name="sparkContext"></param>
+        /// <returns></returns>
+        public static SqlContext GetOrCreate(SparkContext sparkContext)
+        {
+            if (instance == null)
+            {
+                return new SqlContext(sparkContext);
+            }
+            return instance;
+        }
+
+        /// <summary>
+        /// Returns a new SQLContext as new session, that has separate SQLConf, 
+        /// registered temporary tables and UDFs, but shared SparkContext and table cache.
+        /// </summary>
+        /// <returns></returns>
+        public SqlContext NewSession()
+        {
+            var newSessionProxy = sqlContextProxy.NewSession();
+            return new SqlContext(this.sparkContext, newSessionProxy);
+        }
+
+        /// <summary>
+        /// Returns the value of Spark SQL configuration property for the given key.
+        /// If the key is not set, returns defaultValue.
+        /// </summary>
+        /// <param name="key"></param>
+        /// <param name="defaultValue"></param>
+        /// <returns></returns>
+        public string GetConf(string key, string defaultValue)
+        {
+            return sqlContextProxy.GetConf(key, defaultValue);
+        }
+
+        /// <summary>
+        /// Sets the given Spark SQL configuration property.
+        /// </summary>
+        /// <param name="key"></param>
+        /// <param name="value"></param>
+        public void SetConf(string key, string value)
+        {
+            sqlContextProxy.SetConf(key, value);
        }

        /// <summary>
@ -28,6 +93,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// </summary>
        public DataFrameReader Read()
        {
+            logger.LogInfo("Using DataFrameReader to read input data from external data source");
            return new DataFrameReader(sqlContextProxy.Read(), sparkContext);
        }

@ -40,9 +106,16 @@ namespace Microsoft.Spark.CSharp.Sql
        /// <returns></returns>
        public DataFrame ReadDataFrame(string path, StructType schema, Dictionary<string, string> options)
        {
+            logger.LogInfo("Reading DataFrame from file {0}", path);
            return new DataFrame(sqlContextProxy.ReadDataFrame(path, schema, options), sparkContext);
        }

+        /// <summary>
+        /// Creates a <see cref="DataFrame"/> from a RDD containing array of object using the given schema.
+        /// </summary>
+        /// <param name="rdd">RDD containing array of object. The array acts as a row and items within the array act as columns which the schema is specified in <paramref name="schema"/>. </param>
+        /// <param name="schema">The schema of DataFrame.</param>
+        /// <returns></returns>
        public DataFrame CreateDataFrame(RDD<object[]> rdd, StructType schema)
        {
            // Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker. 
@ -55,6 +128,100 @@ namespace Microsoft.Spark.CSharp.Sql
            return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext); 
        }

+        /// <summary>
+        /// Registers the given <see cref="DataFrame"/> as a temporary table in the catalog.
+        /// Temporary tables exist only during the lifetime of this instance of SqlContext.
+        /// </summary>
+        /// <param name="dataFrame"></param>
+        /// <param name="tableName"></param>
+        public void RegisterDataFrameAsTable(DataFrame dataFrame, string tableName)
+        {
+            sqlContextProxy.RegisterDataFrameAsTable(dataFrame.DataFrameProxy, tableName);
+        }
+
+        /// <summary>
+        /// Remove the temp table from catalog.
+        /// </summary>
+        /// <param name="tableName"></param>
+        public void DropTempTable(string tableName)
+        {
+            sqlContextProxy.DropTempTable(tableName);
+        }
+
+        /// <summary>
+        /// Returns the specified table as a <see cref="DataFrame"/>
+        /// </summary>
+        /// <param name="tableName"></param>
+        /// <returns></returns>
+        public DataFrame Table(string tableName)
+        {
+            return new DataFrame(sqlContextProxy.Table(tableName), sparkContext);
+        }
+
+        /// <summary>
+        /// Returns a <see cref="DataFrame"/> containing names of tables in the given database.
+        /// If <paramref name="databaseName"/> is not specified, the current database will be used.
+        /// The returned DataFrame has two columns: 'tableName' and 'isTemporary' (a column with bool 
+        /// type indicating if a table is a temporary one or not).
+        /// </summary>
+        /// <param name="databaseName">Name of the database to use. Default to the current database. 
+        /// Note: This is only applicable to HiveContext.</param>
+        /// <returns></returns>
+        public DataFrame Tables(string databaseName = null)
+        {
+            return databaseName == null ?
+                new DataFrame(sqlContextProxy.Tables(), sparkContext) :
+                new DataFrame(sqlContextProxy.Tables(databaseName), sparkContext);
+        }
+
+        /// <summary>
+        /// Returns a list of names of tables in the database <paramref name="databaseName"/>
+        /// </summary>
+        /// <param name="databaseName">Name of the database to use. Default to the current database.
+        /// Note: This is only applicable to HiveContext.</param>
+        /// <returns></returns>
+        public IEnumerable<string> TableNames(string databaseName = null)
+        {
+            return databaseName == null ?
+                sqlContextProxy.TableNames() : sqlContextProxy.TableNames(databaseName);
+        }
+
+        /// <summary>
+        /// Caches the specified table in-memory.
+        /// </summary>
+        /// <param name="tableName"></param>
+        public void CacheTable(string tableName)
+        {
+            sqlContextProxy.CacheTable(tableName);
+        }
+
+        /// <summary>
+        /// Removes the specified table from the in-memory cache.
+        /// </summary>
+        /// <param name="tableName"></param>
+        public void UncacheTable(string tableName)
+        {
+            sqlContextProxy.UncacheTable(tableName);
+        }
+
+        /// <summary>
+        /// Removes all cached tables from the in-memory cache.
+        /// </summary>
+        public void ClearCache()
+        {
+            sqlContextProxy.ClearCache();
+        }
+
+        /// <summary>
+        /// Returns true if the table is currently cached in-memory.
+        /// </summary>
+        /// <param name="tableName"></param>
+        /// <returns></returns>
+        public bool IsCached(string tableName)
+        {
+            return sqlContextProxy.IsCached(tableName);
+        }
+
        /// <summary>
        /// Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'
        /// </summary>
@ -62,6 +229,7 @@ namespace Microsoft.Spark.CSharp.Sql
        /// <returns></returns>
        public DataFrame Sql(string sqlQuery)
        {
+            logger.LogInfo("SQL query to execute on the dataframe is {0}", sqlQuery);
            return new DataFrame(sqlContextProxy.Sql(sqlQuery), sparkContext);
        }

@ -117,7 +285,7 @@ namespace Microsoft.Spark.CSharp.Sql
        #region UDF Registration
        /// <summary>
        /// Register UDF with no input argument, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool}"/>("MyFilter", () => true);
+        ///     SqlContext.RegisterFunction&lt;bool>("MyFilter", () => true);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter()");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -131,7 +299,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 1 input argument, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string}"/>("MyFilter", (arg1) => arg1 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string>("MyFilter", (arg1) => arg1 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -146,7 +314,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 2 input arguments, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string, string}"/>("MyFilter", (arg1, arg2) => arg1 != null && arg2 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string, string>("MyFilter", (arg1, arg2) => arg1 != null &amp;&amp; arg2 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -162,7 +330,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 3 input arguments, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string, string, string}"/>("MyFilter", (arg1, arg2, arg3) => arg1 != null && arg2 != null && arg3 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, string>("MyFilter", (arg1, arg2, arg3) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; arg3 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, columnName3)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -179,7 +347,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 4 input arguments, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string, string, ..., string}"/>("MyFilter", (arg1, arg2, ..., arg4) => arg1 != null && arg2 != null && ... && arg3 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg4) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg3 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName4)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -197,7 +365,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 5 input arguments, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string, string, ..., string}"/>("MyFilter", (arg1, arg2, ..., arg5) => arg1 != null && arg2 != null && ... && arg5 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg5) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg5 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName5)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -216,7 +384,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 6 input arguments, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string, string, ..., string}"/>("MyFilter", (arg1, arg2, ..., arg6) => arg1 != null && arg2 != null && ... && arg6 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg6) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg6 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName6)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -236,7 +404,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 7 input arguments, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string, string, ..., string}"/>("MyFilter", (arg1, arg2, ..., arg7) => arg1 != null && arg2 != null && ... && arg7 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg7) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg7 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName7)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -257,7 +425,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 8 input arguments, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string, string, ..., string}"/>("MyFilter", (arg1, arg2, ..., arg8) => arg1 != null && arg2 != null && ... && arg8 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg8) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg8 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName8)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -279,7 +447,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 9 input arguments, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string, string, ..., string}"/>("MyFilter", (arg1, arg2, ..., arg9) => arg1 != null && arg2 != null && ... && arg9 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg9) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg9 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName9)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
@ -302,7 +470,7 @@ namespace Microsoft.Spark.CSharp.Sql

        /// <summary>
        /// Register UDF with 10 input arguments, e.g:
-        ///     <see cref="SqlContext.RegisterFunction{bool, string, string, ..., string}"/>("MyFilter", (arg1, arg2, ..., arg10) => arg1 != null && arg2 != null && ... && arg10 != null);
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg10) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg10 != null);
        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName10)");
        /// </summary>
        /// <typeparam name="RT"></typeparam>
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs
@ -14,6 +14,9 @@ using Newtonsoft.Json.Linq;

 namespace Microsoft.Spark.CSharp.Sql
 {
+    /// <summary>
+    /// The base type of all Spark SQL data types.
+    /// </summary>
    [Serializable]
    public abstract class DataType
    {
@ -38,6 +41,9 @@ namespace Microsoft.Spark.CSharp.Sql
        /// </summary>
        internal virtual object JsonValue { get { return TypeName; } }

+        /// <summary>
+        /// The compact JSON representation of this data type.
+        /// </summary>
        public string Json
        {
            get
@ -47,11 +53,23 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Parses a Json string to construct a DataType.
+        /// </summary>
+        /// <param name="json">The Json string to be parsed</param>
+        /// <returns>The new DataType instance from the Json string</returns>
        public static DataType ParseDataTypeFromJson(string json)
        {
            return ParseDataTypeFromJson(JToken.Parse(json));
        }

+        /// <summary>
+        /// Parse a JToken object to construct a DataType.
+        /// </summary>
+        /// <param name="json">The JToken object to be parsed</param>
+        /// <returns>The new DataType instance from the Json string</returns>
+        /// <exception cref="NotImplementedException">Not implemented for "udt" type</exception>
+        /// <exception cref="ArgumentException"></exception>
        protected static DataType ParseDataTypeFromJson(JToken json)
        {
            if (json.Type == JTokenType.Object) // {name: address, type: {type: struct,...},...}
@ -112,63 +130,125 @@ namespace Microsoft.Spark.CSharp.Sql

    }

+    /// <summary>
+    /// An internal type used to represent a simple type. 
+    /// </summary>
    [Serializable]
    public class AtomicType : DataType
    {
    }

+    /// <summary>
+    /// An internal type used to represent a complex type (such as arrays, structs, and maps).
+    /// </summary>
    [Serializable]
    public abstract class ComplexType : DataType
    {
+        /// <summary>
+        /// Abstract method that constructs a complex type from a Json object
+        /// </summary>
+        /// <param name="json">The Json object to construct a complex type</param>
+        /// <returns>A new constructed complex type</returns>
        public abstract DataType FromJson(JObject json);
+        /// <summary>
+        /// Constructs a complex type from a Json string
+        /// </summary>
+        /// <param name="json">The string that represents a Json.</param>
+        /// <returns>A new constructed complex type</returns>
        public DataType FromJson(string json)
        {
            return FromJson(JObject.Parse(json));
        }
    }

-
+    /// <summary>
+    /// The data type representing NULL values.
+    /// </summary>
    [Serializable]
    public class NullType : AtomicType { }

+    /// <summary>
+    /// The data type representing String values.
+    /// </summary>
    [Serializable]
    public class StringType : AtomicType { }

+    /// <summary>
+    /// The data type representing binary values.
+    /// </summary>
    [Serializable]
    public class BinaryType : AtomicType { }

+    /// <summary>
+    /// The data type representing Boolean values.
+    /// </summary>
    [Serializable]
    public class BooleanType : AtomicType { }

+    /// <summary>
+    /// The data type representing Date values.
+    /// </summary>
    [Serializable]
    public class DateType : AtomicType { }

+    /// <summary>
+    /// The data type representing Timestamp values. 
+    /// </summary>
    [Serializable]
    public class TimestampType : AtomicType { }

+    /// <summary>
+    /// The data type representing Double values.
+    /// </summary>
    [Serializable]
    public class DoubleType : AtomicType { }

+    /// <summary>
+    /// 
+    /// </summary>
    [Serializable]
    public class FloatType : AtomicType { }

+    /// <summary>
+    /// The data type representing Float values.
+    /// </summary>
    [Serializable]
    public class ByteType : AtomicType { }

+    /// <summary>
+    /// 
+    /// </summary>
    [Serializable]
    public class IntegerType : AtomicType { }

+    /// <summary>
+    /// The data type representing Int values.
+    /// </summary>
    [Serializable]
    public class LongType : AtomicType { }

+    /// <summary>
+    /// The data type representing Short values.
+    /// </summary>
    [Serializable]
    public class ShortType : AtomicType { }

+    /// <summary>
+    /// The data type representing Decimal values.
+    /// </summary>
    [Serializable]
    public class DecimalType : AtomicType
    {
+        /// <summary>
+        /// Gets the regular expression that represents a fixed decimal. 
+        /// </summary>
        public static Regex FixedDecimal = new Regex(@"decimal\((\d+),\s(\d+)\)");
        private int? precision, scale;
+        /// <summary>
+        /// Initializes a new instance of DecimalType from parameters specifying its precision and scale.
+        /// </summary>
+        /// <param name="precision">The precision of the type</param>
+        /// <param name="scale">The scale of the type</param>
        public DecimalType(int? precision = null, int? scale = null)
        {
            this.precision = precision;
@ -180,18 +260,38 @@ namespace Microsoft.Spark.CSharp.Sql
            get { throw new NotImplementedException(); }
        }

+        /// <summary>
+        /// Constructs a DecimalType from a Json object
+        /// </summary>
+        /// <param name="json">The Json object used to construct a DecimalType</param>
+        /// <returns>A new DecimalType instance</returns>
+        /// <exception cref="NotImplementedException">Not implemented yet.</exception>
        public DataType FromJson(JObject json)
        {
            throw new NotImplementedException();
        }
    }

+    /// <summary>
+    /// The data type for collections of multiple values. 
+    /// </summary>
    [Serializable]
    public class ArrayType : ComplexType
    {
+        /// <summary>
+        /// Gets the DataType of each element in the array
+        /// </summary>
        public DataType ElementType { get { return elementType; } }
+        /// <summary>
+        /// Returns whether the array can contain null (None) values
+        /// </summary>
        public bool ContainsNull { get { return containsNull; } }

+        /// <summary>
+        /// Initializes a ArrayType instance with a specific DataType and specifying if the array has null values.
+        /// </summary>
+        /// <param name="elementType">The data type of values</param>
+        /// <param name="containsNull">Indicates if values have null values</param>
        public ArrayType(DataType elementType, bool containsNull = true)
        {
            this.elementType = elementType;
@ -203,6 +303,9 @@ namespace Microsoft.Spark.CSharp.Sql
            FromJson(json);
        }

+        /// <summary>
+        /// Readable string representation for the type.
+        /// </summary>
        public override string SimpleString
        {
            get { return string.Format("array<{0}>", elementType.SimpleString); }
@ -219,6 +322,11 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Constructs a ArrayType from a Json object
+        /// </summary>
+        /// <param name="json">The Json object used to construct a ArrayType</param>
+        /// <returns>A new ArrayType instance</returns>
        public override sealed DataType FromJson(JObject json)
        {
            elementType = ParseDataTypeFromJson(json["elementType"]);
@ -230,6 +338,9 @@ namespace Microsoft.Spark.CSharp.Sql
        private bool containsNull;
    }

+    /// <summary>
+    /// The data type for Maps. Not implemented yet.
+    /// </summary>
    [Serializable]
    public class MapType : ComplexType
    {
@ -238,20 +349,48 @@ namespace Microsoft.Spark.CSharp.Sql
            get { throw new NotImplementedException(); }
        }

+        /// <summary>
+        /// Constructs a StructField from a Json object. Not implemented yet.
+        /// </summary>
+        /// <param name="json">The Json object used to construct a MapType</param>
+        /// <returns>A new MapType instance</returns>
+        /// <exception cref="NotImplementedException"></exception>
        public override DataType FromJson(JObject json)
        {
            throw new NotImplementedException();
        }
    }

+    /// <summary>
+    /// A field inside a StructType.
+    /// </summary>
    [Serializable]
    public class StructField : ComplexType
    {
+        /// <summary>
+        /// The name of this field.
+        /// </summary>
        public string Name { get { return name; } }
+        /// <summary>
+        /// The data type of this field.
+        /// </summary>
        public DataType DataType { get { return dataType; } }
+        /// <summary>
+        /// Indicates if values of this field can be null values.
+        /// </summary>
        public bool IsNullable { get { return isNullable; } }
+        /// <summary>
+        /// The metadata of this field. The metadata should be preserved during transformation if the content of the column is not modified, e.g, in selection. 
+        /// </summary>
        public JObject Metadata { get { return metadata; } }

+        /// <summary>
+        /// Initializes a StructField instance with a specific name, data type, nullable, and metadata
+        /// </summary>
+        /// <param name="name">The name of this field</param>
+        /// <param name="dataType">The data type of this field</param>
+        /// <param name="isNullable">Indicates if values of this field can be null values</param>
+        /// <param name="metadata">The metadata of this field</param>
        public StructField(string name, DataType dataType, bool isNullable = true, JObject metadata = null)
        {
            this.name = name;
@ -265,6 +404,9 @@ namespace Microsoft.Spark.CSharp.Sql
            FromJson(json);
        }

+        /// <summary>
+        /// Returns a readable string that represents the type.
+        /// </summary>
        public override string SimpleString { get { return string.Format(@"{0}:{1}", name, dataType.SimpleString); } }

        internal override object JsonValue
@ -279,6 +421,11 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Constructs a StructField from a Json object
+        /// </summary>
+        /// <param name="json">The Json object used to construct a StructField</param>
+        /// <returns>A new StructField instance</returns>
        public override sealed DataType FromJson(JObject json)
        {
            name = json["name"].ToString();
@ -295,9 +442,16 @@ namespace Microsoft.Spark.CSharp.Sql
        private JObject metadata;
    }

+    /// <summary>
+    /// Struct type, consisting of a list of StructField
+    /// This is the data type representing a Row
+    /// </summary>
    [Serializable]
    public class StructType : ComplexType
    {
+        /// <summary>
+        /// Gets a list of StructField.
+        /// </summary>
        public List<StructField> Fields { get { return fields; } }

        internal IStructTypeProxy StructTypeProxy
@ -311,6 +465,10 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Initializes a StructType instance with a specific collection of SructField object.
+        /// </summary>
+        /// <param name="fields">The collection that holds StructField objects</param>
        public StructType(IEnumerable<StructField> fields)
        {
            this.fields = fields.ToList();
@ -328,6 +486,9 @@ namespace Microsoft.Spark.CSharp.Sql
            FromJson(jsonSchema);
        }

+        /// <summary>
+        /// Returns a readable string that joins all <see cref="StructField"/>s together.
+        /// </summary>
        public override string SimpleString
        {
            get { return string.Format(@"struct<{0}>", string.Join(",", fields.Select(f => f.SimpleString))); }
@ -343,6 +504,11 @@ namespace Microsoft.Spark.CSharp.Sql
            }
        }

+        /// <summary>
+        /// Constructs a StructType from a Json object
+        /// </summary>
+        /// <param name="json">The Json object used to construct a StructType</param>
+        /// <returns>A new StructType instance</returns>
        public override sealed DataType FromJson(JObject json)
        {
            var fieldsJObjects = json["fields"].Select(f => (JObject)f);
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/ConstantInputDStream.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/ConstantInputDStream.cs
@ -0,0 +1,31 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
+
+namespace Microsoft.Spark.CSharp.Streaming
+{
+    /// <summary>
+    /// An input stream that always returns the same RDD on each timestep. Useful for testing.
+    /// </summary>
+    public class ConstantInputDStream<T> : DStream<T>
+    {
+        /// <summary>
+        /// Construct a ConstantInputDStream instance.
+        /// </summary>
+        public ConstantInputDStream(RDD<T> rdd, StreamingContext ssc)
+        {
+            if (rdd == null)
+            {
+                throw new ArgumentNullException("Parameter rdd null is illegal, which will lead to NPE in the following transformation");
+            }
+
+            dstreamProxy = ssc.streamingContextProxy.CreateConstantInputDStream(rdd.RddProxy);
+            streamingContext = ssc;
+            serializedMode = SerializedMode.Byte;
+        }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/DStream.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/DStream.cs
@ -221,12 +221,12 @@ namespace Microsoft.Spark.CSharp.Streaming
        /// <summary>
        /// Enable periodic checkpointing of RDDs of this DStream
        /// </summary>
-        /// <param name="intervalMs">time in seconds, after each period of that, generated RDD will be checkpointed</param>
+        /// <param name="intervalSeconds">time in seconds, after each period of that, generated RDD will be checkpointed</param>
        /// <returns></returns>
-        public DStream<T> Checkpoint(long intervalMs)
+        public DStream<T> Checkpoint(int intervalSeconds)
        {
            isCheckpointed = true;
-            DStreamProxy.Checkpoint(intervalMs);
+            DStreamProxy.Checkpoint(intervalSeconds);
            return this;
        }

@ -373,7 +373,7 @@ namespace Microsoft.Spark.CSharp.Streaming
            return DStreamProxy.Slice(fromUnixTime, toUnixTime).Select(r => new RDD<T>(r, streamingContext.SparkContext, serializedMode)).ToArray();
        }

-        internal void ValidatWindowParam(int windowSeconds, int slideSeconds)
+        internal void ValidateWindowParam(int windowSeconds, int slideSeconds)
        {
            int duration = SlideDuration;

@ -403,7 +403,7 @@ namespace Microsoft.Spark.CSharp.Streaming
        /// <returns></returns>
        public DStream<T> Window(int windowSeconds, int slideSeconds)
        {
-            ValidatWindowParam(windowSeconds, slideSeconds);
+            ValidateWindowParam(windowSeconds, slideSeconds);
            return new DStream<T>(DStreamProxy.Window(windowSeconds, slideSeconds), streamingContext, serializedMode);
        }

--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/EventHubsUtils.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/EventHubsUtils.cs
@ -0,0 +1,43 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using Microsoft.Spark.CSharp.Core;
+
+namespace Microsoft.Spark.CSharp.Streaming
+{
+    /// <summary>
+    /// Utility for creating streams from 
+    /// </summary>
+    public class EventHubsUtils
+    {
+        /// <summary>
+        /// Create a unioned EventHubs stream that receives data from Microsoft Azure Eventhubs
+        /// The unioned stream will receive message from all partitions of the EventHubs
+        /// </summary>
+        /// <param name="ssc">Streaming context</param>
+        /// <param name="eventhubsParams"> Parameters for EventHubs.
+        ///  Required parameters are:
+        ///  "eventhubs.policyname": EventHubs policy name
+        ///  "eventhubs.policykey": EventHubs policy key
+        ///  "eventhubs.namespace": EventHubs namespace
+        ///  "eventhubs.name": EventHubs name
+        ///  "eventhubs.partition.count": Number of partitions
+        ///  "eventhubs.checkpoint.dir": checkpoint directory on HDFS
+        ///
+        ///  Optional parameters are:
+        ///  "eventhubs.consumergroup": EventHubs consumer group name, default to "\$default"
+        ///  "eventhubs.filter.offset": Starting offset of EventHubs, default to "-1"
+        ///  "eventhubs.filter.enqueuetime": Unix time, millisecond since epoch, default to "0"
+        ///  "eventhubs.default.credits": default AMQP credits, default to -1 (which is 1024)
+        ///  "eventhubs.checkpoint.interval": checkpoint interval in second, default to 10
+        /// </param>
+        /// <param name="storageLevelType">Storage level, by default it is MEMORY_ONLY</param>
+        /// <returns>DStream with byte[] representing events from EventHub</returns>
+        public static DStream<byte[]> CreateUnionStream(StreamingContext ssc, Dictionary<string, string> eventhubsParams, StorageLevelType storageLevelType = StorageLevelType.MEMORY_ONLY)
+        {
+            return new DStream<byte[]>(ssc.streamingContextProxy.EventHubsUnionStream(eventhubsParams, storageLevelType), ssc, SerializedMode.None);
+        }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/Kafka.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/Kafka.cs
@ -6,11 +6,16 @@ using System.Collections.Generic;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
+using System.Runtime.Serialization.Formatters.Binary;
+using System.IO;

 using Microsoft.Spark.CSharp.Core;

 namespace Microsoft.Spark.CSharp.Streaming
 {
+    /// <summary>
+    /// Utils for Kafka input stream.
+    /// </summary>
    public class KafkaUtils
    {
        /// <summary>
@ -30,6 +35,7 @@ namespace Microsoft.Spark.CSharp.Streaming
        /// <summary>
        /// Create an input stream that pulls messages from a Kafka Broker.
        /// </summary>
+        /// <param name="ssc">Spark Streaming Context</param>
        /// <param name="zkQuorum">Zookeeper quorum (hostname:port,hostname:port,..).</param>
        /// <param name="groupId">The group id for this consumer.</param>
        /// <param name="topics">Dict of (topic_name -> numPartitions) to consume. Each partition is consumed in its own thread.</param>
@ -79,5 +85,90 @@ namespace Microsoft.Spark.CSharp.Streaming
        {
            return new DStream<KeyValuePair<byte[], byte[]>>(ssc.streamingContextProxy.DirectKafkaStream(topics, kafkaParams, fromOffsets), ssc, SerializedMode.Pair);
        }
+
+        /// <summary>
+        /// Create an input stream that directly pulls messages from a Kafka Broker and specific offset.
+        /// 
+        /// This is not a receiver based Kafka input stream, it directly pulls the message from Kafka
+        /// in each batch duration and processed without storing.
+        /// 
+        /// This does not use Zookeeper to store offsets. The consumed offsets are tracked
+        /// by the stream itself. For interoperability with Kafka monitoring tools that depend on
+        /// Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application.
+        /// You can access the offsets used in each batch from the generated RDDs (see
+        /// [[org.apache.spark.streaming.kafka.HasOffsetRanges]]).
+        /// To recover from driver failures, you have to enable checkpointing in the StreamingContext.
+        /// The information on consumed offset can be recovered from the checkpoint.
+        /// See the programming guide for details (constraints, etc.).
+        /// 
+        /// </summary>
+        /// <param name="ssc">Spark Streaming Context</param>
+        /// <param name="topics">list of topic_name to consume.</param>
+        /// <param name="kafkaParams">
+        ///     Additional params for Kafka. Requires "metadata.broker.list" or "bootstrap.servers" to be set
+        ///     with Kafka broker(s) (NOT zookeeper servers), specified in host1:port1,host2:port2 form.        
+        /// </param>
+        /// <param name="fromOffsets">Per-topic/partition Kafka offsets defining the (inclusive) starting point of the stream.</param>
+        /// <param name="numPartitions">
+        ///     user hint on how many kafka RDD partitions to create instead of aligning with kafka partitions,
+        ///     unbalanced kafka partitions and/or under-distributed data will be redistributed evenly across 
+        ///     a probably larger number of RDD partitions
+        ///     If numPartitions = -1, either repartition based on spark.streaming.kafka.maxRatePerTask or do nothing if config not defined
+        ///     If numPartitions = 0, repartition using original kafka partition count
+        ///     If numPartitions > 0, repartition using this parameter
+        /// </param>
+        /// <returns>A DStream object</returns>
+        public static DStream<KeyValuePair<byte[], byte[]>> CreateDirectStreamWithRepartition(StreamingContext ssc, List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets, int numPartitions = -1)
+        {
+            return new DStream<KeyValuePair<byte[], byte[]>>(ssc.streamingContextProxy.DirectKafkaStreamWithRepartition(topics, kafkaParams, fromOffsets, numPartitions, null, null), ssc, SerializedMode.Pair);
+        }
+
+        /// <summary>
+        /// Create an input stream that directly pulls messages from a Kafka Broker and specific offset.
+        /// 
+        /// This is not a receiver based Kafka input stream, it directly pulls the message from Kafka
+        /// in each batch duration and processed without storing.
+        /// 
+        /// This does not use Zookeeper to store offsets. The consumed offsets are tracked
+        /// by the stream itself. For interoperability with Kafka monitoring tools that depend on
+        /// Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application.
+        /// You can access the offsets used in each batch from the generated RDDs (see
+        /// [[org.apache.spark.streaming.kafka.HasOffsetRanges]]).
+        /// To recover from driver failures, you have to enable checkpointing in the StreamingContext.
+        /// The information on consumed offset can be recovered from the checkpoint.
+        /// See the programming guide for details (constraints, etc.).
+        /// 
+        /// </summary>
+        /// <param name="ssc">Spark Streaming Context</param>
+        /// <param name="topics">list of topic_name to consume.</param>
+        /// <param name="kafkaParams">
+        ///     Additional params for Kafka. Requires "metadata.broker.list" or "bootstrap.servers" to be set
+        ///     with Kafka broker(s) (NOT zookeeper servers), specified in host1:port1,host2:port2 form.        
+        /// </param>
+        /// <param name="fromOffsets">Per-topic/partition Kafka offsets defining the (inclusive) starting point of the stream.</param>
+        /// <param name="numPartitions">
+        ///     user hint on how many kafka RDD partitions to create instead of aligning with kafka partitions,
+        ///     unbalanced kafka partitions and/or under-distributed data will be redistributed evenly across 
+        ///     a probably larger number of RDD partitions
+        ///     If numPartitions = -1, either repartition based on spark.streaming.kafka.maxRatePerTask or do nothing if config not defined
+        ///     If numPartitions = 0, repartition using original kafka partition count
+        ///     If numPartitions > 0, repartition using this parameter
+        /// </param>
+        /// <param name="readFunc">user function to process the kafka data.</param>
+        /// <returns>A DStream object</returns>
+        public static DStream<T> CreateDirectStreamWithRepartitionAndReadFunc<T>(StreamingContext ssc, List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets,
+            int numPartitions, Func<int, IEnumerable<KeyValuePair<byte[], byte[]>>, IEnumerable<T>> readFunc)
+        {
+            var mapPartitionsWithIndexHelper = new MapPartitionsWithIndexHelper<KeyValuePair<byte[], byte[]>, T>(readFunc, true);
+            var transformHelper = new TransformHelper<KeyValuePair<byte[], byte[]>, T>(mapPartitionsWithIndexHelper.Execute);
+            var transformDynamicHelper = new TransformDynamicHelper<KeyValuePair<byte[], byte[]>, T>(transformHelper.Execute);
+            Func<double, RDD<dynamic>, RDD<dynamic>> func = transformDynamicHelper.Execute;
+            var formatter = new BinaryFormatter();
+            var stream = new MemoryStream();
+            formatter.Serialize(stream, func);
+            byte[] readFuncBytes = stream.ToArray();
+            string serializationMode = SerializedMode.Pair.ToString();
+            return new DStream<T>(ssc.streamingContextProxy.DirectKafkaStreamWithRepartition(topics, kafkaParams, fromOffsets, numPartitions, readFuncBytes, serializationMode), ssc);
+        }
    }
 }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/MapWithStateDStream.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/MapWithStateDStream.cs
@ -0,0 +1,433 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.Serialization;
+using System.Runtime.Serialization.Formatters.Binary;
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
+using Microsoft.Spark.CSharp.Services;
+
+namespace Microsoft.Spark.CSharp.Streaming
+{
+    /// <summary>
+    /// DStream representing the stream of data generated by `mapWithState` operation on a pair DStream.
+    /// Additionally, it also gives access to the stream of state snapshots, that is, the state data of all keys after a batch has updated them.
+    /// </summary>
+    /// <typeparam name="K">Type of the key</typeparam>
+    /// <typeparam name="V">Type of the value</typeparam>
+    /// <typeparam name="S">Type of the state data</typeparam>
+    /// <typeparam name="M">Type of the mapped data</typeparam>
+    [Serializable]
+    public class MapWithStateDStream<K, V, S, M> : DStream<M>
+    {
+        internal DStream<KeyValuePair<K, S>> snapshotsDStream;
+
+        internal MapWithStateDStream(DStream<M> mappedDataDStream, DStream<KeyValuePair<K, S>> snapshotsDStream)
+            : base(mappedDataDStream.DStreamProxy, mappedDataDStream.streamingContext)
+        {
+            this.snapshotsDStream = snapshotsDStream;
+        }
+
+        /// <summary>
+        /// Return a pair DStream where each RDD is the snapshot of the state of all the keys.
+        /// </summary>
+        public DStream<KeyValuePair<K, S>> StateSnapshots()
+        {
+            return snapshotsDStream;
+        }
+    }
+
+    /// <summary>
+    /// Class to hold a state instance and the timestamp when the state is updated or created.
+    /// No need to explicitly make this class clonable, since the serialization and deserialization in Worker is already a kind of clone mechanism. 
+    /// </summary>
+    /// <typeparam name="S">Type of the state data</typeparam>
+    [Serializable]
+    internal class KeyedState<S>
+    {
+        internal S state;
+        internal long ticks;
+
+        internal KeyedState()
+        {
+            
+        }
+
+        internal KeyedState(S state, long ticks)
+        {
+            this.state = state;
+            this.ticks = ticks;
+        }
+    }
+
+    /// <summary>
+    /// Record storing the keyed-state MapWithStateRDD. 
+    /// Each record contains a stateMap and a sequence of records returned by the mapping function of MapWithState.
+    /// Note: don't need to explicitly make this class clonable, since the serialization and deserialization in Worker is already a kind of clone. 
+    /// </summary>
+    /// <typeparam name="K">Type of the key</typeparam>
+    /// <typeparam name="S">Type of the state data</typeparam>
+    /// <typeparam name="M">Type of the mapped data</typeparam>
+    [Serializable]
+    internal class MapWithStateRDDRecord<K, S, M>
+    {
+        internal Dictionary<K, KeyedState<S>> stateMap = new Dictionary<K, KeyedState<S>>();
+        internal List<M> mappedData = new List<M>();
+
+        public MapWithStateRDDRecord()
+        {
+        }
+
+        public MapWithStateRDDRecord(long t, IEnumerable<KeyValuePair<K, S>> iter)
+        {
+            foreach (var p in iter)
+            {
+                stateMap[p.Key] = new KeyedState<S>(p.Value, t);
+            }
+        }
+    }
+
+    /// <summary>
+    /// Helper class to update states for a RDD partition.
+    /// Reference: https://github.com/apache/spark/blob/master/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
+    /// </summary>
+    /// <typeparam name="K">Type of the key</typeparam>
+    /// <typeparam name="V">Type of the value</typeparam>
+    /// <typeparam name="S">Type of the state data</typeparam>
+    /// <typeparam name="M">Type of the mapped data</typeparam>
+    [Serializable]
+    internal class UpdateStateHelper<K, V, S, M>
+    {
+        [NonSerialized]
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(UpdateStateHelper<K, V, S, M>));
+
+        private readonly Func<K, V, State<S>, M> f;
+        private readonly long ticks;
+        private readonly bool removeTimedoutData;
+        private readonly TimeSpan idleDuration;
+
+        internal UpdateStateHelper(Func<K, V, State<S>, M> f, long ticks, bool removeTimedoutData, TimeSpan idleDuration)
+        {
+            this.f = f;
+            this.ticks = ticks;
+            this.removeTimedoutData = removeTimedoutData;
+            this.idleDuration = idleDuration;
+        }
+
+        internal IEnumerable<dynamic> Execute(int pid, IEnumerable<dynamic> iter)
+        {
+            var enumerator = iter.GetEnumerator();
+            var preStateRddRecord = GetStateRecord(enumerator);
+            var stateRddRecord = preStateRddRecord;
+
+            while (enumerator.MoveNext())
+            {
+                KeyValuePair<K, V> kv = enumerator.Current;
+                KeyedState<S> keyedState;
+                State<S> wrappedState = stateRddRecord.stateMap.TryGetValue(kv.Key, out keyedState) ? new State<S>(keyedState.state) : new State<S>(default(S));
+
+                var mappedData = default(M);
+                try
+                {
+                    mappedData = f(kv.Key, kv.Value, wrappedState);
+                }
+                catch (Exception e)
+                {
+                    logger.LogException(e);
+                }
+               
+                stateRddRecord.mappedData.Add(mappedData);
+
+                if (wrappedState.removed)
+                {
+                    stateRddRecord.stateMap.Remove(kv.Key);
+                }
+                else if (wrappedState.updated || wrappedState.defined)
+                {
+                    stateRddRecord.stateMap[kv.Key] = new KeyedState<S>(wrappedState.state, ticks);
+                }
+            }
+
+            // Get the timed out state records, call the mapping function on each and collect the data returned
+            if (removeTimedoutData)
+            {
+                long timeoutThresholdInTicks = ticks - idleDuration.Ticks;
+                var toBeRemovedKeys = new List<K>();
+                foreach (KeyValuePair<K, KeyedState<S>> entry in stateRddRecord.stateMap)
+                {
+                    if (entry.Value.ticks >= timeoutThresholdInTicks) continue;
+
+                    var timingOutstate = new State<S>(entry.Value.state, true);
+                    var mappedData = default(M);
+                    try
+                    {
+                        mappedData = f(entry.Key, default(V), timingOutstate);
+                    }
+                    catch (Exception e)
+                    { 
+                        logger.LogException(e);
+                    }
+                        
+                    stateRddRecord.mappedData.Add(mappedData);
+                    toBeRemovedKeys.Add(entry.Key);
+                }
+
+                foreach (var k in toBeRemovedKeys)
+                {
+                    stateRddRecord.stateMap.Remove(k);
+                }
+            }
+
+            return new []{stateRddRecord};
+        }
+
+        internal MapWithStateRDDRecord<K, S, M> GetStateRecord(IEnumerator<dynamic> enumerator)
+        {
+            if (enumerator.MoveNext())
+            {
+                return enumerator.Current;
+            }
+
+            throw new InvalidOperationException("MapWithStateRDDRecord is missing.");
+        }
+    }
+
+    [Serializable]
+    internal class MapWithStateHelper<K, V, S, M>
+    {
+        private static readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
+        private readonly Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc;
+        private readonly StateSpec<K, V, S, M> stateSpec;
+
+        internal MapWithStateHelper(Func<double, RDD<dynamic>, RDD<dynamic>> prevF, StateSpec<K, V, S, M> stateSpec)
+        {
+            prevFunc = prevF;
+            this.stateSpec = stateSpec;
+        }
+
+        internal RDD<dynamic> Execute(double t, RDD<dynamic> stateRDD, RDD<dynamic> valuesRDD)
+        {
+            long ticks = UnixTimeEpoch.AddMilliseconds(t).Ticks;
+
+            if (prevFunc != null)
+            {
+                valuesRDD = prevFunc(t, valuesRDD);
+            }
+
+            var values = valuesRDD.ConvertTo<KeyValuePair<K, V>>().PartitionBy(stateSpec.numPartitions);
+
+            if (stateRDD == null)
+            {
+                if (stateSpec.initialState != null)
+                {
+                    if (stateSpec.initialState.sparkContext == null)
+                    {
+                        stateSpec.initialState.sparkContext = valuesRDD.sparkContext;
+                    }
+                    var partitionedInitialState = stateSpec.initialState.PartitionBy(stateSpec.numPartitions);
+                    stateRDD = partitionedInitialState.MapPartitions(new MapWithStateMapPartitionHelper<K, V, S, M>(ticks).Execute, true).ConvertTo<dynamic>();
+                }
+                else
+                {
+                    stateRDD = values.PartitionBy(stateSpec.numPartitions).MapPartitions(new MapWithStateMapPartitionHelper<K, V, S, M>(ticks).ExecuteWithoutInitialState, true).ConvertTo<dynamic>();
+                }
+            }
+            
+            bool removeTimedoutData = stateSpec.idleDuration.Ticks != 0 && stateRDD.IsCheckpointed;
+            stateRDD.partitioner = values.partitioner;
+            RDD<dynamic> union = stateRDD.Union(values.ConvertTo<dynamic>());
+
+            return union.MapPartitionsWithIndex(new UpdateStateHelper<K, V, S, M>(stateSpec.mappingFunction, ticks, removeTimedoutData, stateSpec.idleDuration).Execute, true);
+        }
+    }
+
+    [Serializable]
+    internal class MapWithStateMapPartitionHelper<K, V, S, M>
+    {
+        internal long ticks;
+        internal MapWithStateMapPartitionHelper(long ticks)
+        {
+            this.ticks = ticks;
+        }
+
+        internal IEnumerable<MapWithStateRDDRecord<K, S, M>> Execute(IEnumerable<KeyValuePair<K, S>> iter)
+        {
+            return new[] {new MapWithStateRDDRecord<K, S, M>(ticks, iter)};
+        }
+
+        internal IEnumerable<MapWithStateRDDRecord<K, S, M>> ExecuteWithoutInitialState(IEnumerable<KeyValuePair<K, V>> iter)
+        {
+            return new[] { new MapWithStateRDDRecord<K, S, M>() };
+        }
+    }
+
+    /// <summary>
+    /// Representing all the specifications of the DStream transformation `mapWithState` operation.
+    /// </summary>
+    /// <typeparam name="K">Type of the key</typeparam>
+    /// <typeparam name="V">Type of the value</typeparam>
+    /// <typeparam name="S">Type of the state data</typeparam>
+    /// <typeparam name="M">Type of the mapped data</typeparam>
+    [Serializable]
+    public class StateSpec<K, V, S, M>
+    {
+        internal Func<K, V, State<S>, M> mappingFunction;
+        internal int numPartitions;
+        internal TimeSpan idleDuration = TimeSpan.FromTicks(0);
+        internal RDD<KeyValuePair<K, S>> initialState = null;
+
+        /// <summary>
+        /// Create a StateSpec for setting all the specifications of the `mapWithState` operation on a pair DStream.
+        /// </summary>
+        /// <param name="mappingFunction">The function applied on every data item to manage the associated state and generate the mapped data</param>
+        public StateSpec(Func<K, V, State<S>, M> mappingFunction)
+        {
+            this.mappingFunction = mappingFunction;
+        }
+
+        /// <summary>
+        /// Set the number of partitions by which the state RDDs generated by `mapWithState` will be partitioned.
+        /// Hash partitioning will be used.
+        /// </summary>
+        /// <param name="numPartitions">The number of partitions</param>
+        /// <returns>The new StateSpec object</returns>
+        public StateSpec<K, V, S, M> NumPartitions(int numPartitions)
+        {
+            this.numPartitions = numPartitions;
+            return this;
+        }
+
+        /// <summary>
+        /// Set the duration after which the state of an idle key will be removed. A key and its state is
+        /// considered idle if it has not received any data for at least the given duration. The
+        /// mapping function will be called one final time on the idle states that are going to be
+        /// removed; [[org.apache.spark.streaming.State State.isTimingOut()]] set to `true` in that call.
+        /// </summary>
+        /// <param name="idleDuration">The idle time of duration</param>
+        /// <returns>The new StateSpec object</returns>
+        public StateSpec<K, V, S, M> Timeout(TimeSpan idleDuration)
+        {
+            this.idleDuration = idleDuration;
+            return this;
+        }
+
+        /// <summary>
+        /// Set the RDD containing the initial states that will be used by mapWithState
+        /// </summary>
+        /// <param name="initialState">The given initial state</param>
+        /// <returns>The new StateSpec object</returns>
+        public StateSpec<K, V, S, M> InitialState(RDD<KeyValuePair<K, S>> initialState)
+        {
+            this.initialState = initialState;
+            return this;
+        }
+    }
+
+    /// <summary>
+    /// class for getting and updating the state in mapping function used in the `mapWithState` operation
+    /// </summary>
+    /// <typeparam name="S">Type of the state</typeparam>
+    [Serializable]
+    public class State<S>
+    {
+        internal S state = default(S);
+
+        [NonSerialized]
+        internal bool defined = false;
+        [NonSerialized]
+        internal bool timingOut = false; // FIXME: set timingOut to true for those timeouted keys
+        [NonSerialized]
+        internal bool updated = false;
+        [NonSerialized]
+        internal bool removed = false;
+
+        internal State(S state, bool timingOut = false)
+        {
+            this.state = state;
+            this.timingOut = timingOut;
+            removed = false;
+            updated = false;
+
+            if (!timingOut)
+            {
+                defined = !ReferenceEquals(null, state);
+            }
+            else
+            {
+                defined = true;
+            }
+        }
+
+        /// <summary>
+        /// Returns whether the state already exists
+        /// </summary>
+        /// <returns>true, if the state already exists; otherwise, false.</returns>
+        public bool Exists()
+        {
+            return defined;
+        }
+
+        /// <summary>
+        /// Gets the state if it exists, otherwise it will throw ArgumentException.
+        /// </summary>
+        /// <returns>The state</returns>
+        /// <exception cref="ArgumentException">ArgumentException if it does not exist.</exception>
+        public S Get()
+        {
+            if (defined)
+            {
+                return state;
+            }
+            throw new ArgumentException("State is not set");
+        }
+
+        /// <summary>
+        /// Updates the state with a new value.
+        /// </summary>
+        /// <param name="newState">The new state</param>
+        /// <exception cref="ArgumentException">ArgumentException if the state already be removed or timing out</exception>
+        public void Update(S newState)
+        {
+            if (removed || timingOut)
+            {
+                throw new ArgumentException("Cannot update the state that is timing out or has been removed.");
+            }
+            state = newState;
+            defined = true;
+            updated = true;
+        }
+
+        /// <summary>
+        /// Removes the state if it exists.
+        /// </summary>
+        /// <exception cref="ArgumentException">ArgumentException if the state already be removed or timing out</exception>
+        public void Remove()
+        {
+            if (removed || timingOut)
+            {
+                throw new ArgumentException("Cannot update the state that is timing out or has already been removed.");
+            }
+            defined = false;
+            updated = false;
+            removed = true;
+        }
+
+        /// <summary>
+        /// Returns whether the state is timing out and going to be removed by the system after the current batch.
+        /// </summary>
+        /// <returns>true, if it is timing out; otherwise, false.</returns>
+        public bool IsTimingOut()
+        {
+            return timingOut;
+        }
+    }
+}
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/PairDStreamFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/PairDStreamFunctions.cs
@ -267,17 +267,16 @@ namespace Microsoft.Spark.CSharp.Streaming
            int numPartitions = 0,
            Func<KeyValuePair<K, V>, bool> filterFunc = null)
        {
-            self.ValidatWindowParam(windowSeconds, slideSeconds);
+            self.ValidateWindowParam(windowSeconds, slideSeconds);

            if (slideSeconds <= 0)
                slideSeconds = self.SlideDuration;

            // dstream to be transformed by substracting old RDDs and adding new RDDs based on the window
            var reduced = self.ReduceByKey(reduceFunc, numPartitions);
+            reduced.Cache();

-            Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc = reduced.Piplinable ? (reduced as TransformedDStream<KeyValuePair<K, V>>).func : null;
-
-            var helper = new ReduceByKeyAndWindowHelper<K, V>(reduceFunc, invReduceFunc, numPartitions, filterFunc, prevFunc);
+            var helper = new ReduceByKeyAndWindowHelper<K, V>(reduceFunc, invReduceFunc, numPartitions, filterFunc);
            // function to reduce the new values that entered the window (e.g., adding new counts)
            Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> reduceF = helper.Reduce;

@ -292,17 +291,17 @@ namespace Microsoft.Spark.CSharp.Streaming
                Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> invReduceF = helper.InvReduce;

                invStream = new MemoryStream();
-                formatter.Serialize(stream, invReduceF);
+                formatter.Serialize(invStream, invReduceF);
            }

            return new DStream<KeyValuePair<K, V>>(
                SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpReducedWindowedDStream(
-                    reduced.Piplinable ? reduced.prevDStreamProxy : reduced.DStreamProxy, 
+                    reduced.DStreamProxy, 
                    stream.ToArray(),
                    invStream == null ? null : invStream.ToArray(),
                    windowSeconds,
                    slideSeconds,
-                    (reduced.Piplinable ? reduced.prevSerializedMode : reduced.serializedMode).ToString()), 
+                    reduced.serializedMode.ToString()), 
                self.streamingContext
            );
        }
@ -319,13 +318,14 @@ namespace Microsoft.Spark.CSharp.Streaming
        ///     State update function - (newValues, oldState) => newState
        ///     If this function returns None, then corresponding state key-value pair will be eliminated.
        /// </param>
+        /// <param name="initialState">Initial state value of each key</param>
        /// <param name="numPartitions"></param>
        /// <returns></returns>
        public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream<KeyValuePair<K, V>> self,
-            Func<IEnumerable<V>, S, S> updateFunc,
+            Func<IEnumerable<V>, S, S> updateFunc, RDD<KeyValuePair<K, S>> initialState = null,
            int numPartitions = 0)
        {
-            return UpdateStateByKey<K, V, S>(self, new UpdateStateByKeyHelper<K, V, S>(updateFunc).Execute, numPartitions);
+            return UpdateStateByKey<K, V, S>(self, new UpdateStateByKeyHelper<K, V, S>(updateFunc).Execute, initialState, numPartitions);
        }
        
        /// <summary>
@ -337,13 +337,14 @@ namespace Microsoft.Spark.CSharp.Streaming
        /// <typeparam name="S"></typeparam>
        /// <param name="self"></param>
        /// <param name="updateFunc">State update function - IEnumerable[K, [newValues, oldState]] => IEnumerable[K, newState]</param>
+        /// <param name="initialState">Initial state value of each key</param>
        /// <param name="numPartitions"></param>
        /// <returns></returns>
        public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream<KeyValuePair<K, V>> self,
-            Func<IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<KeyValuePair<K, S>>> updateFunc,
+            Func<IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<KeyValuePair<K, S>>> updateFunc, RDD<KeyValuePair<K, S>> initialState = null,
            int numPartitions = 0)
        {
-            return UpdateStateByKey<K, V, S>(self, new MapPartitionsHelper<KeyValuePair<K, Tuple<IEnumerable<V>, S>>, KeyValuePair<K, S>>(updateFunc).Execute, numPartitions);
+            return UpdateStateByKey<K, V, S>(self, new MapPartitionsHelper<KeyValuePair<K, Tuple<IEnumerable<V>, S>>, KeyValuePair<K, S>>(updateFunc).Execute, initialState, numPartitions);
        }
        
        /// <summary>
@ -355,30 +356,68 @@ namespace Microsoft.Spark.CSharp.Streaming
        /// <typeparam name="S"></typeparam>
        /// <param name="self"></param>
        /// <param name="updateFunc">State update function - (pid, IEnumerable[K, [newValues, oldState]]) => IEnumerable[K, newState]</param>
+        /// <param name="initialState">Initial state value of each key</param>
        /// <param name="numPartitions"></param>
        /// <returns></returns>
        public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream<KeyValuePair<K, V>> self,
            Func<int, IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<KeyValuePair<K, S>>> updateFunc,
-            int numPartitions = 0)
+            RDD<KeyValuePair<K, S>> initialState = null, int numPartitions = 0)
        {
            if (numPartitions <= 0)
                numPartitions = self.streamingContext.SparkContext.DefaultParallelism;

-            Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc = self.Piplinable ? (self as TransformedDStream<KeyValuePair<K, V>>).func : null;
+            // completes pipelinable dstream by adding the last pipelinable operation
+            // before transforming to CSharpStateDStream so that UpdateStateByKey's 
+            // parallel job covers all pipelinable operations before shuffling
+            var ds = self.Transform(new AddShuffleKeyHelper<K, V>(numPartitions).Execute);

-            Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> func = new UpdateStateByKeysHelper<K, V, S>(updateFunc, prevFunc, numPartitions).Execute;
+            Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> func = new UpdateStateByKeysHelper<K, V, S>(updateFunc, initialState, numPartitions).Execute;

            var formatter = new BinaryFormatter();
            var stream = new MemoryStream();
            formatter.Serialize(stream, func);

            return new DStream<KeyValuePair<K, S>>(SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpStateDStream(
+                    ds.DStreamProxy,
+                    stream.ToArray(),
+                    "CSharpStateDStream",
+                    ds.serializedMode.ToString(),
+                    ds.serializedMode.ToString()),
+                self.streamingContext);
+        }
+
+        /// <summary>
+        /// Return a new "state" DStream where the state for each key is updated by applying
+        /// the given function on the previous state of the key and the new values of the key.
+        /// </summary>
+        public static MapWithStateDStream<K, V, S, M> MapWithState<K, V, S, M>(this DStream<KeyValuePair<K, V>> self, StateSpec<K, V, S, M> stateSpec)
+        {
+            if (stateSpec.numPartitions <= 0)
+            {
+                stateSpec = stateSpec.NumPartitions(self.streamingContext.SparkContext.DefaultParallelism);
+            }
+
+            Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc = self.Piplinable ? (self as TransformedDStream<KeyValuePair<K, V>>).func : null;
+
+            Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> func = new MapWithStateHelper<K, V, S, M>(prevFunc, stateSpec).Execute;
+
+            var formatter = new BinaryFormatter();
+            var stream = new MemoryStream();
+            formatter.Serialize(stream, func);
+
+            var mapWithStateDStream = new DStream<MapWithStateRDDRecord<K, S, M>>(SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpStateDStream(
                    self.Piplinable ? self.prevDStreamProxy : self.DStreamProxy,
                    stream.ToArray(),
                    "CSharpStateDStream",
                    self.serializedMode.ToString(),
                    (self.Piplinable ? self.prevSerializedMode : self.serializedMode).ToString()),
                self.streamingContext);
+
+            DStream<M> mappedDataDStream = mapWithStateDStream.FlatMap(r => r.mappedData);
+            DStream<KeyValuePair<K, S>> snapshotsDStream = mapWithStateDStream.FlatMap(
+                r => r.stateMap.Select(entry => new KeyValuePair<K, S>(entry.Key, entry.Value.state)));
+
+            return new MapWithStateDStream<K, V, S, M>(mappedDataDStream, snapshotsDStream);
        }
    }

@ -425,6 +464,25 @@ namespace Microsoft.Spark.CSharp.Streaming
        }
    }

+    [Serializable]
+    internal class AddShuffleKeyHelper<K, V>
+    {
+        private readonly int numPartitions;
+        internal AddShuffleKeyHelper(int numPartitions)
+        {
+            this.numPartitions = numPartitions;
+        }
+
+        internal RDD<byte[]> Execute(RDD<KeyValuePair<K, V>> rdd)
+        {
+            var keyed = rdd.MapPartitionsWithIndex(new PairRDDFunctions.AddShuffleKeyHelper<K, V>(numPartitions).Execute, true);
+            keyed.bypassSerializer = true;
+            keyed.rddProxy = keyed.RddProxy;
+
+            return keyed;
+        }
+    }
+
    [Serializable]
    internal class MapValuesHelper<K, V, U>
    {
@ -509,7 +567,7 @@ namespace Microsoft.Spark.CSharp.Streaming
            this.numPartitions = numPartitions;
        }

-        internal RDD<KeyValuePair<K, Tuple<V, Option<W>>>> Execute<K,V,W>(RDD<KeyValuePair<K, V>> l, RDD<KeyValuePair<K, W>> r)
+        internal RDD<KeyValuePair<K, Tuple<V, Option<W>>>> Execute(RDD<KeyValuePair<K, V>> l, RDD<KeyValuePair<K, W>> r)
        {
            return l.LeftOuterJoin<K, V, W>(r, numPartitions);
        }
@ -552,34 +610,28 @@ namespace Microsoft.Spark.CSharp.Streaming
        private readonly Func<V, V, V> invReduceFunc;
        private readonly int numPartitions;
        private readonly Func<KeyValuePair<K, V>, bool> filterFunc;
-        private readonly Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc;

        internal ReduceByKeyAndWindowHelper(Func<V, V, V> reduceF, 
            Func<V, V, V> invReduceF, 
            int numPartitions, 
-            Func<KeyValuePair<K, V>, bool> filterF, 
-            Func<double, RDD<dynamic>, RDD<dynamic>> prevF)
+            Func<KeyValuePair<K, V>, bool> filterF)
        {
            reduceFunc = reduceF;
            invReduceFunc = invReduceF;
            this.numPartitions = numPartitions;
            filterFunc = filterF;
-            prevFunc = prevF;
        }

        internal RDD<dynamic> Reduce(double t, RDD<dynamic> a, RDD<dynamic> b)
        {
-            if (prevFunc != null)
-                b = prevFunc(t, b);
-
-            var r = b.ConvertTo<KeyValuePair<K, V>>().ReduceByKey<K, V>(reduceFunc);
+            b.partitioner = new Partitioner(numPartitions, null);
+            var r = b.ConvertTo<KeyValuePair<K, V>>();
            if (a != null)
            {
-                if (prevFunc != null)
-                    a = prevFunc(t, a);
-                
-                r = a.ConvertTo<KeyValuePair<K, V>>().Union(r).ReduceByKey<K, V>(reduceFunc);
+                a.partitioner = b.partitioner;
+                r = a.ConvertTo<KeyValuePair<K, V>>().Union(r);
            }
+            r = r.ReduceByKey<K, V>(reduceFunc, numPartitions);
            if (filterFunc != null)
                r.Filter(filterFunc);
            return r.ConvertTo<dynamic>();
@ -587,13 +639,8 @@ namespace Microsoft.Spark.CSharp.Streaming

        internal RDD<dynamic> InvReduce(double t, RDD<dynamic> a, RDD<dynamic> b)
        {
-            if (prevFunc != null)
-            {
-                a = prevFunc(t, a);
-                b = prevFunc(t, b);
-            }
-
-            var rddb = b.ConvertTo<KeyValuePair<K, V>>().ReduceByKey<K, V>(reduceFunc);
+            a.partitioner = b.partitioner = new Partitioner(numPartitions, null);
+            var rddb = b.ConvertTo<KeyValuePair<K, V>>().ReduceByKey<K, V>(reduceFunc, numPartitions);
            var rdda = a.ConvertTo<KeyValuePair<K, V>>();
            var joined = rdda.Join<K, V, V>(rddb, numPartitions);
            var r = joined.MapValues<K, Tuple<V, V>, V>(kv => kv.Item2 != null ? invReduceFunc(kv.Item1, kv.Item2) : kv.Item1);
@ -621,14 +668,14 @@ namespace Microsoft.Spark.CSharp.Streaming
    internal class UpdateStateByKeysHelper<K, V, S>
    {
        private readonly Func<int, IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<KeyValuePair<K, S>>> func;
-        private readonly Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc;
+        private readonly RDD<KeyValuePair<K, S>> initialState;
        private readonly int numPartitions;
        internal UpdateStateByKeysHelper(
            Func<int, IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<KeyValuePair<K, S>>> f, 
-            Func<double, RDD<dynamic>, RDD<dynamic>> prevF, int numPartitions)
+            RDD<KeyValuePair<K, S>> initialState, int numPartitions)
        {
            func = f;
-            prevFunc = prevF;
+            this.initialState = initialState;
            this.numPartitions = numPartitions;
        }

@ -637,10 +684,21 @@ namespace Microsoft.Spark.CSharp.Streaming
            RDD<KeyValuePair<K, S>> state = null;
            RDD<KeyValuePair<K, Tuple<IEnumerable<V>, S>>> g = null;

-            if (prevFunc != null)
-                valuesRDD = prevFunc(t, valuesRDD);
+            // call into scala side partitionBy directly since AddShuffleKey already applied
+            var values = new RDD<KeyValuePair<K, V>>(valuesRDD.sparkContext.SparkContextProxy.CreatePairwiseRDD(valuesRDD.rddProxy, numPartitions, 0), valuesRDD.sparkContext);
+            values.partitioner = new Partitioner(numPartitions, null);

-            var values = valuesRDD.ConvertTo<KeyValuePair<K, V>>();
+            if (stateRDD == null)
+            {
+                if (initialState != null)
+                {
+                    if (initialState.sparkContext == null)
+                    {
+                        initialState.sparkContext = valuesRDD.sparkContext;
+                    }
+                    stateRDD = initialState.ConvertTo<dynamic>();
+                }
+            }

            if (stateRDD == null)
            {
@ -649,7 +707,6 @@ namespace Microsoft.Spark.CSharp.Streaming
            else
            {
                state = stateRDD.ConvertTo<KeyValuePair<K, S>>();
-                values = values.PartitionBy(numPartitions);
                state.partitioner = values.partitioner;
                g = state.GroupWith(values, numPartitions).MapValues(x => new Tuple<IEnumerable<V>, S>(new List<V>(x.Item2), x.Item1.Count > 0 ? x.Item1[0] : default(S)));
            }
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/StreamingContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/StreamingContext.cs
@ -52,10 +52,15 @@ namespace Microsoft.Spark.CSharp.Streaming
            this.streamingContextProxy = streamingContextProxy;
        }

-        public StreamingContext(SparkContext sparkContext, long durationMs)
+        /// <summary>
+        /// Initializes a new instance of StreamingContext with a existing SparkContext 
+        /// </summary>
+        /// <param name="sparkContext">An existing SparkContext</param>
+        /// <param name="durationSeconds">the time interval at which streaming data will be divided into batches</param>
+        public StreamingContext(SparkContext sparkContext, int durationSeconds)
        {
            this.sparkContext = sparkContext;
-            streamingContextProxy = SparkCLREnvironment.SparkCLRProxy.CreateStreamingContext(sparkContext, durationMs);
+            streamingContextProxy = SparkCLREnvironment.SparkCLRProxy.CreateStreamingContext(sparkContext, durationSeconds);
        }

        /// <summary>
@ -79,11 +84,17 @@ namespace Microsoft.Spark.CSharp.Streaming
            return new StreamingContext(SparkCLREnvironment.SparkCLRProxy.CreateStreamingContext(checkpointPath));
        }

+        /// <summary>
+        /// Start the execution of the streams. 
+        /// </summary>
        public void Start()
        {
            streamingContextProxy.Start();
        }

+        /// <summary>
+        /// Stop the execution of the streams.
+        /// </summary>
        public void Stop()
        {
            streamingContextProxy.Stop();
@ -95,10 +106,10 @@ namespace Microsoft.Spark.CSharp.Streaming
        /// collection. This method allows the developer to specify how long to remember the RDDs (
        /// if the developer wishes to query old data outside the DStream computation).
        /// </summary>
-        /// <param name="durationMs">Minimum duration that each DStream should remember its RDDs</param>
-        public void Remember(long durationMs)
+        /// <param name="durationSeconds">Minimum duration that each DStream should remember its RDDs</param>
+        public void Remember(int durationSeconds)
        {
-            streamingContextProxy.Remember(durationMs);
+            streamingContextProxy.Remember(durationSeconds);
        }

        /// <summary>
@ -152,10 +163,10 @@ namespace Microsoft.Spark.CSharp.Streaming
        /// <summary>
        /// Wait for the execution to stop.
        /// </summary>
-        /// <param name="timeout">time to wait in seconds</param>
-        public void AwaitTerminationOrTimeout(int timeout)
+        /// <param name="timeout">time to wait in milliseconds</param>
+        public void AwaitTerminationOrTimeout(long timeout)
        {
-            streamingContextProxy.AwaitTermination(timeout);
+            streamingContextProxy.AwaitTerminationOrTimeout(timeout);
        }

        /// <summary>
--- a/csharp/Adapter/documentation/DocFormatter.xsl
+++ b/csharp/Adapter/documentation/DocFormatter.xsl
@ -2,7 +2,7 @@
 <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

 <xsl:template match="/">
-##<center><H1><font color="darkorchid4">SparkCLR API Documentation<!--xsl:value-of select="$AssemblyName"/--></font></H1></center>
+##<center><H1><font color="darkorchid4">Mobius API Documentation<!--xsl:value-of select="$AssemblyName"/--></font></H1></center>
 <xsl:apply-templates select="//member[contains(@name,'T:') and not(contains(@name,'Helper')) and not(contains(@name,'Wrapper')) and not(contains(@name,'Configuration')) and not(contains(@name,'Proxy')) and not(contains(@name,'Interop')) and not(contains(@name,'Services'))]"/>
 </xsl:template>

--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
--- a/csharp/Adapter/documentation/Mobius_API_Documentation.md
+++ b/csharp/Adapter/documentation/Mobius_API_Documentation.md
--- a/csharp/Adapter/documentation/SparkCLR_API_Documentation.md
+++ b/csharp/Adapter/documentation/SparkCLR_API_Documentation.md
--- a/csharp/AdapterTest/AccumulatorTest.cs
+++ b/csharp/AdapterTest/AccumulatorTest.cs
@ -1,21 +1,14 @@
-using System;
-using System.IO;
+using System.IO;
 using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
 using System.Net;
-using System.Net.Sockets;
 using System.Runtime.Serialization.Formatters.Binary;

 using Microsoft.Spark.CSharp.Core;
-using Microsoft.Spark.CSharp.Interop;
-using Microsoft.Spark.CSharp.Proxy;
 using Microsoft.Spark.CSharp.Interop.Ipc;

 using NUnit.Framework;
-using Moq;
 using AdapterTest.Mocks;
+using Microsoft.Spark.CSharp.Network;

 namespace AdapterTest
 {
@ -27,7 +20,7 @@ namespace AdapterTest
    public class AccumulatorTest
    {
        private SparkContext sc;
-        private Socket sock;
+        private ISocketWrapper sock;


        [SetUp]
@ -38,7 +31,7 @@ namespace AdapterTest

            // get accumulator server port and connect to accumuator server
            int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort;
-            sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
+            sock = SocketFactory.CreateSocket();
            sock.Connect(IPAddress.Loopback, serverPort);
        }

@ -49,29 +42,31 @@ namespace AdapterTest

            try
            {
-                using (var s = new NetworkStream(sock))
+                using (var s = sock.GetStream())
                {
                    int numUpdates = 0;
                    SerDe.Write(s, numUpdates);
                }
-
-                sock.Close();
            }
            catch
            {
                // do nothing here
            }
+            finally
+            {
+                sock.Close();
+            }
        }

        /// <summary>
-        /// test when no errors, accumuator server receives data as expected and exit with 0
+        /// test when no errors, accumulator server receives data as expected and exit with 0
        /// </summary>
        [Test]
        public void TestAccumuatorSuccess()
        {
            Accumulator<int> accumulator = sc.Accumulator<int>(0);

-            using (var s = new NetworkStream(sock))
+            using (var s = sock.GetStream())
            {
                // write numUpdates
                int numUpdates = 1;
@ -102,7 +97,7 @@ namespace AdapterTest
        [Test]
        public void TestUndefinedAccumuator()
        {
-            using (var s = new NetworkStream(sock))
+            using (var s = sock.GetStream())
            {
                // write numUpdates
                int numUpdates = 1;
--- a/csharp/AdapterTest/AdapterTest.csproj
+++ b/csharp/AdapterTest/AdapterTest.csproj
@ -72,6 +72,7 @@
    <Compile Include="DataFrameNaFunctionsTest.cs" />
    <Compile Include="DataFrameReaderTest.cs" />
    <Compile Include="DataFrameWriterTest.cs" />
+    <Compile Include="EventHubsUtilsTest.cs" />
    <Compile Include="JsonSerDeTest.cs" />
    <Compile Include="FunctionsTest.cs" />
    <Compile Include="Mocks\MockDataFrameReaderProxy.cs" />
@ -81,6 +82,7 @@
    <Compile Include="Properties\AssemblyInfo.cs" />
    <Compile Include="RowTest.cs" />
    <Compile Include="SerDeTest.cs" />
+    <Compile Include="HiveContextTest.cs" />
    <Compile Include="StatusTrackerTest.cs" />
    <Compile Include="TestWithMoqDemo.cs" />
    <Compile Include="Mocks\MockStructTypeProxy.cs" />
@ -107,12 +109,17 @@
    <Compile Include="ComparableRDDTest.cs" />
    <Compile Include="DoubleRDDTest.cs" />
    <Compile Include="UserDefinedFunctionTest.cs" />
+    <Compile Include="WeakObjectManagerTest.cs" />
  </ItemGroup>
  <ItemGroup>
    <ProjectReference Include="..\Adapter\Microsoft.Spark.CSharp\Adapter.csproj">
      <Project>{ce999a96-f42b-4e80-b208-709d7f49a77c}</Project>
      <Name>Adapter</Name>
    </ProjectReference>
+    <ProjectReference Include="..\Tests.Common\Tests.Common.csproj">
+      <Project>{e4479c4c-e106-4b90-bf0c-319561cea9c4}</Project>
+      <Name>Tests.Common</Name>
+    </ProjectReference>
  </ItemGroup>
  <ItemGroup />
  <ItemGroup>
--- a/csharp/AdapterTest/BroadcastTest.cs
+++ b/csharp/AdapterTest/BroadcastTest.cs
@ -108,7 +108,8 @@ namespace AdapterTest

            // worker side operations            
            Broadcast<int> broadcastVarInWorker = CreateBroadcastVarInWorker(expectedValue, out bid, out dumpPath);
-            Broadcast.broadcastRegistry.Remove(bid);
+            Broadcast bc;
+            Broadcast.broadcastRegistry.TryRemove(bid, out bc);

            // assert
            Assert.Throws<ArgumentException>(() => { var broadcastValueInWorker = broadcastVarInWorker.Value; });
--- a/csharp/AdapterTest/ColumnTest.cs
+++ b/csharp/AdapterTest/ColumnTest.cs
@ -195,6 +195,24 @@ namespace AdapterTest
            mockColumnProxy.Verify(m => m.BinOp("bitwiseXOR", column2.ColumnProxy), Times.Once);
        }

+        [Test]
+        public void TestColumnGetHashCode()
+        {
+            var column1 = new Column(null);
+            Assert.AreEqual(0, column1.GetHashCode());
+
+            var column2 = new Column(mockColumnProxy.Object);
+            Assert.AreNotEqual(0, column2.GetHashCode());
+        }
+
+        [Test]
+        public void TestColumnEquals()
+        {
+            var column1 = new Column(mockColumnProxy.Object);
+            var column2 = new Column(mockColumnProxy.Object);
+            Assert.IsTrue(column1.Equals(column2));
+        }
+
        [Test]
        public void TestColumnLike()
        {
--- a/csharp/AdapterTest/ComparableRDDTest.cs
+++ b/csharp/AdapterTest/ComparableRDDTest.cs
@ -4,6 +4,7 @@ using AdapterTest.Mocks;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using NUnit.Framework;
+using System.Linq;

 namespace AdapterTest
 {
@ -39,6 +40,12 @@ namespace AdapterTest
            Assert.AreEqual(2, taken.Length);
            Assert.AreEqual("brown", taken[0]);
            Assert.AreEqual("dog", taken[1]);
+
+            taken = words.Distinct().TakeOrdered(2, x => new string(x.ToCharArray().Reverse().ToArray()));
+            Array.Sort(taken, StringComparer.Ordinal);
+            Assert.AreEqual(2, taken.Length);
+            Assert.AreEqual("The", taken[0]);
+            Assert.AreEqual("the", taken[1]);
        }

        [Test]
--- a/csharp/AdapterTest/DStreamTest.cs
+++ b/csharp/AdapterTest/DStreamTest.cs
@ -4,9 +4,13 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
+using System.Linq;
 using AdapterTest.Mocks;
 using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop;
+using Microsoft.Spark.CSharp.Proxy;
 using Microsoft.Spark.CSharp.Streaming;
+using Moq;
 using NUnit.Framework;

 namespace AdapterTest
@ -17,7 +21,7 @@ namespace AdapterTest
        [Test]
        public void TestDStreamMapReduce()
        {
-            var ssc = new StreamingContext(new SparkContext("", ""), 1000);
+            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            var lines = ssc.TextFileStream(Path.GetTempPath());
@ -27,7 +31,8 @@ namespace AdapterTest

            words.Slice(DateTime.MinValue, DateTime.MaxValue);
            words.Cache();
-            words.Checkpoint(1000);
+            words.Checkpoint(1);
+            words.Window(1, 1);

            words.Count().ForeachRDD((time, rdd) =>
            {
@ -78,7 +83,7 @@ namespace AdapterTest
        [Test]
        public void TestDStreamTransform()
        {
-            var ssc = new StreamingContext(new SparkContext("", ""), 1000);
+            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            var lines = ssc.TextFileStream(Path.GetTempPath());
@ -134,7 +139,7 @@ namespace AdapterTest
        [Test]
        public void TestDStreamJoin()
        {
-            var ssc = new StreamingContext(new SparkContext("", ""), 1000);
+            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            var lines = ssc.TextFileStream(Path.GetTempPath());
@ -241,7 +246,7 @@ namespace AdapterTest
        [Test]
        public void TestDStreamUpdateStateByKey()
        {
-            var ssc = new StreamingContext(new SparkContext("", ""), 1000);
+            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            var lines = ssc.TextFileStream(Path.GetTempPath());
@ -267,8 +272,23 @@ namespace AdapterTest
            // disable pipeline to UpdateStateByKey which replys on checkpoint mock proxy doesn't support
            pairs.Cache();

-            var state = pairs.UpdateStateByKey<string, int, int>((v, s) => s + (v as List<int>).Count);
+            var initialStateRdd = ssc.SparkContext.Parallelize(new[] { "AAA" }).Map( w => new KeyValuePair<string, int>("AAA", 22));
+            var state = pairs.UpdateStateByKey<string, int, int>((v, s) => s + (v as List<int>).Count, initialStateRdd);
            state.ForeachRDD((time, rdd) =>
+            {
+                var taken = rdd.Collect();
+                Assert.AreEqual(taken.Length, 10);
+
+                foreach (object record in taken)
+                {
+                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
+                    Assert.AreEqual(countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22, countByWord.Value);
+                }
+            });
+
+            // test when initialStateRdd is not provided
+            var state2 = pairs.UpdateStateByKey<string, int, int>((v, s) => s + (v as List<int>).Count);
+            state2.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 9);
@ -276,9 +296,146 @@ namespace AdapterTest
                foreach (object record in taken)
                {
                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
-                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 24 : 23);
+                    Assert.AreEqual(countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22, countByWord.Value);
                }
            });
        }
+
+        [Test]
+        public void TestDStreamMapWithState()
+        {
+            var mapwithStateDStreamProxy = new Mock<IDStreamProxy>();
+            var streamingContextProxy = new Mock<IStreamingContextProxy>();
+            streamingContextProxy.Setup(p =>
+                p.CreateCSharpStateDStream(It.IsAny<IDStreamProxy>(), It.IsAny<byte[]>(), It.IsAny<string>(), It.IsAny<string>(), It.IsAny<string>()))
+                .Returns(mapwithStateDStreamProxy.Object);
+
+            var sparkContextProxy = new Mock<ISparkContextProxy>();
+
+            var sparkConfProxy = new Mock<ISparkConfProxy>();
+
+            var sparkClrProxy = new Mock<ISparkCLRProxy>();
+            sparkClrProxy.Setup(p => p.StreamingContextProxy).Returns(streamingContextProxy.Object);
+            sparkClrProxy.Setup(p => p.SparkContextProxy).Returns(sparkContextProxy.Object);
+            sparkClrProxy.Setup(p => p.CreateSparkContext(It.IsAny<ISparkConfProxy>())).Returns(sparkContextProxy.Object);
+            sparkClrProxy.Setup(p => p.CreateSparkConf(It.IsAny<bool>())).Returns(sparkConfProxy.Object);
+
+            // reset sparkCLRProxy for after test completes
+            var originalSparkCLRProxy = SparkCLREnvironment.SparkCLRProxy;
+            try
+            {
+                SparkCLREnvironment.SparkCLRProxy = sparkClrProxy.Object;
+
+                var sparkConf = new SparkConf(false);
+                var ssc = new StreamingContext(new SparkContext(sparkContextProxy.Object, sparkConf), 10);
+
+                var dstreamProxy = new Mock<IDStreamProxy>();
+                var pairDStream = new DStream<KeyValuePair<string, int>>(dstreamProxy.Object, ssc);
+
+                var stateSpec = new StateSpec<string, int, int, int>((k, v, s) => v);
+                var stateDStream = pairDStream.MapWithState(stateSpec);
+                var snapshotDStream = stateDStream.StateSnapshots();
+
+                Assert.IsNotNull(stateDStream);
+                Assert.IsNotNull(snapshotDStream);
+            }
+            finally
+            {
+                SparkCLREnvironment.SparkCLRProxy = originalSparkCLRProxy;
+            }
+        }
+
+        [Test]
+        public void TestDStreamMapWithStateMapWithStateHelper()
+        {
+            // test when initialStateRdd is null
+            var stateSpec = new StateSpec<string, int, int, int>((k, v, s) => v).NumPartitions(2).Timeout(TimeSpan.FromSeconds(100));
+            var helper = new MapWithStateHelper<string, int, int, int>((t, rdd) => rdd, stateSpec);
+
+            var sparkContextProxy = new Mock<ISparkContextProxy>();
+            var sc = new SparkContext(sparkContextProxy.Object, null);
+
+            var pairwiseRddProxy = new Mock<IRDDProxy>();
+            sparkContextProxy.Setup(p => p.CreatePairwiseRDD(It.IsAny<IRDDProxy>(), It.IsAny<int>(), It.IsAny<long>())).Returns(pairwiseRddProxy.Object);
+
+            var pipelinedRddProxy = new Mock<IRDDProxy>();
+            pipelinedRddProxy.Setup(p => p.Union(It.IsAny<IRDDProxy>())).Returns(new Mock<IRDDProxy>().Object);
+
+            sparkContextProxy.Setup(p => 
+                p.CreateCSharpRdd(It.IsAny<IRDDProxy>(), It.IsAny<byte[]>(), It.IsAny<Dictionary<string, string>>(), It.IsAny<List<string>>(), It.IsAny<bool>(), It.IsAny<List<Broadcast>>(), It.IsAny<List<byte[]>>()))
+                .Returns(pipelinedRddProxy.Object);
+
+            var valueRddProxy = new Mock<IRDDProxy>();
+            var valuesRdd = new RDD<dynamic>(valueRddProxy.Object, sc);
+
+            var resultRdd = helper.Execute(DateTime.UtcNow.Millisecond, null, valuesRdd);
+
+            Assert.IsNotNull(resultRdd);
+
+            // test when initialStateRdd is not null
+            var initialStateRdd = new RDD<KeyValuePair<string, int>>(new Mock<IRDDProxy>().Object, null);
+            var stateSpec2 = new StateSpec<string, int, int, int>((k, v, s) => v).InitialState(initialStateRdd).NumPartitions(2);
+            var helper2 = new MapWithStateHelper<string, int, int, int>((t, rdd) => rdd, stateSpec2);
+
+            var resultRdd2 = helper2.Execute(DateTime.UtcNow.Millisecond, null, valuesRdd);
+
+            Assert.IsNotNull(resultRdd2);
+        }
+
+        [Test]
+        public void TestDStreamMapWithStateUpdateStateHelper()
+        {
+            var ticks = DateTime.UtcNow.Ticks;
+            var helper = new UpdateStateHelper<string, int, int, int>(
+                (k, v, state) =>
+                {
+                    if (v < 0 && state.Exists())
+                    {
+                        state.Remove();
+                    }
+                    else if(!state.IsTimingOut())
+                    {
+                        state.Update(v + state.Get());
+                    }
+                    
+                    return v;
+                },
+                ticks, true, TimeSpan.FromSeconds(10));
+
+            var input = new dynamic[4];
+
+            var preStateRddRecord = new MapWithStateRDDRecord<string, int, int>(ticks - TimeSpan.FromSeconds(2).Ticks, new [] { new KeyValuePair<string, int>("1", 1), new KeyValuePair<string, int>("2", 2)});
+            preStateRddRecord.stateMap.Add("expired", new KeyedState<int>(0, ticks - TimeSpan.FromSeconds(60).Ticks));
+
+            input[0] = preStateRddRecord;
+            input[1] = new KeyValuePair<string, int>("1", -1);
+            input[2] = new KeyValuePair<string, int>("2", 2);
+            input[3] = new KeyValuePair<string, int>("3", 3);
+
+            var result = helper.Execute(1, input).GetEnumerator();
+            Assert.IsNotNull(result);
+            Assert.IsTrue(result.MoveNext());
+
+            MapWithStateRDDRecord<string, int, int> stateRddRecord = result.Current;
+
+            Assert.IsNotNull(stateRddRecord);
+            Assert.AreEqual(stateRddRecord.mappedData.Count, 4); // timedout record also appears in return results
+            Assert.AreEqual(stateRddRecord.stateMap.Count, 2);
+        }
+
+        [Test]
+        public void TestConstantInputDStream()
+        {
+            var sc = new SparkContext("", "");
+            var rdd = sc.Parallelize(Enumerable.Range(0, 10), 1);
+            var ssc = new StreamingContext(sc, 1);
+
+            // test when rdd is null
+            Assert.Throws<ArgumentNullException>(() => new ConstantInputDStream<int>(null, ssc));
+
+            var constantInputDStream = new ConstantInputDStream<int>(rdd, ssc);
+            Assert.IsNotNull(constantInputDStream);
+            Assert.AreEqual(ssc, constantInputDStream.streamingContext);     
+        }
    }
 }
--- a/csharp/AdapterTest/DataFrameNaFunctionsTest.cs
+++ b/csharp/AdapterTest/DataFrameNaFunctionsTest.cs
@ -42,6 +42,13 @@ namespace AdapterTest
        public void TestDropWithAny()
        {
            // arrange
+            const string columnName = "column1";
+            var mockSchemaProxy = new Mock<IStructTypeProxy>();
+            var mockFieldProxy = new Mock<IStructFieldProxy>();
+            mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockSchemaProxy.Object);
+            mockSchemaProxy.Setup(m => m.GetStructTypeFields()).Returns(new List<IStructFieldProxy> { mockFieldProxy.Object });
+            mockFieldProxy.Setup(m => m.GetStructFieldName()).Returns(columnName);
+
            var sparkContext = new SparkContext("", "");
            mockDataFrameNaFunctionsProxy.Setup(m => m.Drop(It.IsAny<int>(), It.IsAny<string[]>())).Returns(mockDataFrameProxy.Object);

@ -50,12 +57,21 @@ namespace AdapterTest

            // act
            var cols = new[] { "col1", "col2" };
-            var df = f.Drop("any", cols);
+            var df1 = f.Drop("any", cols);
+            var df2 = f.Drop();
+            var df3 = f.Drop("any");

            // verify
-            Assert.IsNotNull(df);
-            Assert.AreEqual(df.DataFrameProxy, dataFrame.DataFrameProxy);
+            Assert.IsNotNull(df1);
+            Assert.AreEqual(df1.DataFrameProxy, dataFrame.DataFrameProxy);
            mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(cols.Length, cols), Times.Once);
+
+            Assert.IsNotNull(df2);
+            Assert.AreEqual(df2.DataFrameProxy, dataFrame.DataFrameProxy);
+            
+            Assert.IsNotNull(df3);
+            Assert.AreEqual(df3.DataFrameProxy, dataFrame.DataFrameProxy);
+            mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(1, new[] { columnName }), Times.Exactly(2));
        }

        [Test]
@ -106,6 +122,29 @@ namespace AdapterTest
            mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(It.IsAny<int>(), It.IsAny<string[]>()), Times.Never);
        }

+        [Test]
+        public void TestDropWithMinNonNulls()
+        {
+            const string columnName = "column1";
+            var mockSchemaProxy = new Mock<IStructTypeProxy>();
+            var mockFieldProxy = new Mock<IStructFieldProxy>();
+            mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockSchemaProxy.Object);
+            mockSchemaProxy.Setup(m => m.GetStructTypeFields()).Returns(new List<IStructFieldProxy> { mockFieldProxy.Object });
+            mockFieldProxy.Setup(m => m.GetStructFieldName()).Returns(columnName);
+
+            var sparkContext = new SparkContext("", "");
+            mockDataFrameNaFunctionsProxy.Setup(m => m.Drop(It.IsAny<int>(), It.IsAny<string[]>())).Returns(mockDataFrameProxy.Object);
+
+            var dataFrame = new DataFrame(mockDataFrameProxy.Object, sparkContext);
+            var f = new DataFrameNaFunctions(mockDataFrameNaFunctionsProxy.Object, dataFrame, sparkContext);
+
+            var df = f.Drop(20);
+            Assert.IsNotNull(df);
+            Assert.AreEqual(df.DataFrameProxy, dataFrame.DataFrameProxy);
+            Assert.AreNotSame(dataFrame, df);
+            mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(20, new[] { columnName }), Times.Once);
+        }
+
        [Test]
        public void TestFill()
        {
--- a/csharp/AdapterTest/DataFrameTest.cs
+++ b/csharp/AdapterTest/DataFrameTest.cs
@ -44,6 +44,33 @@ namespace AdapterTest
            SparkCLREnvironment.SparkCLRProxy = new MockSparkCLRProxy();
        }

+        [Test]
+        public void TestRegisterTempTable()
+        {
+            mockDataFrameProxy.Setup(m => m.RegisterTempTable(It.IsAny<string>()));
+            var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
+            dataFrame.RegisterTempTable("TestTable");
+            mockDataFrameProxy.Verify(m => m.RegisterTempTable("TestTable"), Times.Once);
+        }
+
+        [Test]
+        public void TestDataFrameCount()
+        {
+            mockDataFrameProxy.Setup(m => m.Count()).Returns(1);
+            var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
+            Assert.AreEqual(1, dataFrame.Count());
+            mockDataFrameProxy.Verify(m => m.Count(), Times.Once);
+        }
+
+        [Test]
+        public void TestShow()
+        {
+            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
+            var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
+            dataFrame.Show();
+            mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
+        }
+
        [Test]
        public void TestDataFrameJoin()
        {
@ -51,10 +78,54 @@ namespace AdapterTest
            var dataFrame = sqlContext.Read().Json(@"c:\path\to\input.json");
            var dataFrame2 = sqlContext.Read().Json(@"c:\path\to\input2.json"); 
            var joinedDataFrame = dataFrame.Join(dataFrame2, "JoinCol");
-            var paramValuesToJoinMethod = (joinedDataFrame.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference as object[];
-            var paramValuesToSecondDataFrameJsonFileMethod = ((paramValuesToJoinMethod[0] as MockDataFrameProxy).mockDataFrameReference as object[]);
+            var paramValuesToJoinMethod = (joinedDataFrame.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
+            var paramValuesToSecondDataFrameJsonFileMethod = (paramValuesToJoinMethod[0] as MockDataFrameProxy).mockDataFrameReference;
            Assert.AreEqual(@"c:\path\to\input2.json", paramValuesToSecondDataFrameJsonFileMethod[0]);
            Assert.AreEqual("JoinCol", paramValuesToJoinMethod[1]);
+
+            var joinedDataFrame2 = dataFrame.Join(dataFrame2, new[] {"JoinCol1", "JoinCol2"});
+            var paramValuesToJoinMethod2 = (joinedDataFrame2.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
+            var paramValuesToSecondDataFrameJsonFileMethod2 = (paramValuesToJoinMethod2[0] as MockDataFrameProxy).mockDataFrameReference;
+            Assert.AreEqual(@"c:\path\to\input2.json", paramValuesToSecondDataFrameJsonFileMethod2[0]);
+            Assert.AreEqual("JoinCol1", (paramValuesToJoinMethod2[1] as string[])[0]);
+            Assert.AreEqual("JoinCol2", (paramValuesToJoinMethod2[1] as string[])[1]);
+
+            var mockColumnProxy = new Mock<IColumnProxy>().Object;
+            var mockColumn = new Column(mockColumnProxy);
+            var joinedDataFrame3 = dataFrame.Join(dataFrame2, mockColumn);
+            var paramValuesToJoinMethod3 = (joinedDataFrame3.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
+            var paramValuesToSecondDataFrameJsonFileMethod3 = (paramValuesToJoinMethod3[0] as MockDataFrameProxy).mockDataFrameReference;
+            Assert.AreEqual(@"c:\path\to\input2.json", paramValuesToSecondDataFrameJsonFileMethod3[0]);
+            Assert.AreEqual(mockColumnProxy, paramValuesToJoinMethod3[1]);
+            Assert.AreEqual(JoinType.Inner.Value, paramValuesToJoinMethod3[2]);
+
+            var joinedDataFrame4 = dataFrame.Join(dataFrame2, mockColumn, JoinType.Outer);
+            var paramValuesToJoinMethod4 = (joinedDataFrame4.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
+            var paramValuesToSecondDataFrameJsonFileMethod4 = (paramValuesToJoinMethod4[0] as MockDataFrameProxy).mockDataFrameReference;
+            Assert.AreEqual(@"c:\path\to\input2.json", paramValuesToSecondDataFrameJsonFileMethod4[0]);
+            Assert.AreEqual(mockColumnProxy, paramValuesToJoinMethod4[1]);
+            Assert.AreEqual(JoinType.Outer.Value, paramValuesToJoinMethod4[2]);
+
+            var joinedDataFrame5 = dataFrame.Join(dataFrame2, mockColumn, JoinType.LeftOuter);
+            var paramValuesToJoinMethod5 = (joinedDataFrame5.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
+            var paramValuesToSecondDataFrameJsonFileMethod5 = (paramValuesToJoinMethod5[0] as MockDataFrameProxy).mockDataFrameReference;
+            Assert.AreEqual(@"c:\path\to\input2.json", paramValuesToSecondDataFrameJsonFileMethod5[0]);
+            Assert.AreEqual(mockColumnProxy, paramValuesToJoinMethod5[1]);
+            Assert.AreEqual(JoinType.LeftOuter.Value, paramValuesToJoinMethod5[2]);
+
+            var joinedDataFrame6 = dataFrame.Join(dataFrame2, mockColumn, JoinType.RightOuter);
+            var paramValuesToJoinMethod6 = (joinedDataFrame6.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
+            var paramValuesToSecondDataFrameJsonFileMethod6 = (paramValuesToJoinMethod6[0] as MockDataFrameProxy).mockDataFrameReference;
+            Assert.AreEqual(@"c:\path\to\input2.json", paramValuesToSecondDataFrameJsonFileMethod6[0]);
+            Assert.AreEqual(mockColumnProxy, paramValuesToJoinMethod6[1]);
+            Assert.AreEqual(JoinType.RightOuter.Value, paramValuesToJoinMethod6[2]);
+
+            var joinedDataFrame7 = dataFrame.Join(dataFrame2, mockColumn, JoinType.LeftSemi);
+            var paramValuesToJoinMethod7 = (joinedDataFrame7.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
+            var paramValuesToSecondDataFrameJsonFileMethod7 = (paramValuesToJoinMethod7[0] as MockDataFrameProxy).mockDataFrameReference;
+            Assert.AreEqual(@"c:\path\to\input2.json", paramValuesToSecondDataFrameJsonFileMethod7[0]);
+            Assert.AreEqual(mockColumnProxy, paramValuesToJoinMethod7[1]);
+            Assert.AreEqual(JoinType.LeftSemi.Value, paramValuesToJoinMethod7[2]);
        }

        
@ -187,6 +258,48 @@ namespace AdapterTest
            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrame.DataFrameProxy);
        }

+        [Test]
+        public void TestFillNa()
+        {
+            // Arrange
+            const string columnName = "column1";
+            var mockSchemaProxy = new Mock<IStructTypeProxy>();
+            var mockFieldProxy = new Mock<IStructFieldProxy>();
+            var expectedResultDataFrameProxy = new Mock<IDataFrameProxy>().Object;
+            mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockSchemaProxy.Object);
+
+            // dataframeNaFunctionsProxy
+            var dataFrameNaFunctionsProxy = new Mock<IDataFrameNaFunctionsProxy>();
+            dataFrameNaFunctionsProxy.Setup(d => d.Fill(It.IsAny<double>(), It.IsAny<string[]>())).Returns(expectedResultDataFrameProxy);
+            dataFrameNaFunctionsProxy.Setup(d => d.Fill(It.IsAny<string>(), It.IsAny<string[]>())).Returns(expectedResultDataFrameProxy);
+            dataFrameNaFunctionsProxy.Setup(d => d.Fill(It.IsAny<Dictionary<string, object>>())).Returns(expectedResultDataFrameProxy);
+
+            mockDataFrameProxy.Setup(m => m.Na()).Returns(dataFrameNaFunctionsProxy.Object);
+
+            mockSchemaProxy.Setup(m => m.GetStructTypeFields()).Returns(new List<IStructFieldProxy> { mockFieldProxy.Object });
+            mockFieldProxy.Setup(m => m.GetStructFieldName()).Returns(columnName);
+            var sc = new SparkContext(null);
+
+            var dict = new Dictionary<string, object> {{columnName, 1}};
+
+            // Act
+            var originalDataFrame = new DataFrame(mockDataFrameProxy.Object, sc);
+            var actualResultDataFrame1 = originalDataFrame.FillNa(1);
+            var actualResultDataFrame2 = originalDataFrame.FillNa("1", new[] {columnName});
+            var actualResultDataFrame3 = originalDataFrame.FillNa(dict);
+
+            // Assert
+            // assert DropNa of Proxy was invoked with correct parameters
+            dataFrameNaFunctionsProxy.Verify(m => m.Fill(1, It.Is<string[]>(subset => subset.Length == 1 &&
+                subset.Contains(columnName))));
+            dataFrameNaFunctionsProxy.Verify(m => m.Fill("1", It.Is<string[]>(subset => subset.Length == 1 &&
+                subset.Contains(columnName))));
+            dataFrameNaFunctionsProxy.Verify(m => m.Fill(dict));
+            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrame1.DataFrameProxy);
+            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrame2.DataFrameProxy);
+            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrame3.DataFrameProxy);
+        }
+
        [Test]
        public void TestDropDuplicates()
        {
@ -352,7 +465,7 @@ namespace AdapterTest
        }

        [Test]
-        public void TestSort_ColumnNames()
+        public void TestSort()
        {
            // Arrange
            const string columnName = "column1";
@ -374,6 +487,28 @@ namespace AdapterTest
            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrameProxy.DataFrameProxy);
        }

+        [Test]
+        public void TestSortWithinPartitions()
+        {
+            // Arrange
+            const string columnName = "column1";
+            var expectedResultDataFrameProxy = new Mock<IDataFrameProxy>().Object;
+            var mockColumnProxy = new Mock<IColumnProxy>();
+            var mockSortedColumnProxy = new Mock<IColumnProxy>();
+            mockColumnProxy.Setup(m => m.UnaryOp(It.IsAny<string>())).Returns(mockSortedColumnProxy.Object);
+            mockDataFrameProxy.Setup(m => m.GetColumn(It.IsAny<string>())).Returns(mockColumnProxy.Object);
+            mockDataFrameProxy.Setup(m => m.SortWithinPartitions(It.IsAny<IColumnProxy[]>())).Returns(expectedResultDataFrameProxy);
+
+            var sc = new SparkContext(null);
+
+            // Act
+            var originalDataFrame = new DataFrame(mockDataFrameProxy.Object, sc);
+            var actualResultDataFrameProxy = originalDataFrame.SortWithinPartitions(new[] { columnName });
+
+            // Assert
+            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrameProxy.DataFrameProxy);
+        }
+
        [Test]
        public void TestAlias()
        {
@ -544,6 +679,30 @@ namespace AdapterTest
            mockDataFrameProxy.Verify(m => m.Repartition(numPartitions), Times.Once());
        }

+        [Test]
+        public void TestRepartition2()
+        {
+            // arrange
+            mockDataFrameProxy.Setup(m => m.Repartition(It.IsAny<int>(), It.IsAny<IColumnProxy[]>()));
+
+            var sc = new SparkContext(null);
+            var dataFrame = new DataFrame(mockDataFrameProxy.Object, sc);
+
+            const int numPartitions = 5;
+            IColumnProxy mockColumn1Proxy = new Mock<IColumnProxy>().Object;
+            Column mockColumn = new Column(mockColumn1Proxy);
+
+            // act
+            dataFrame.Repartition(new[] { mockColumn }, numPartitions);
+            // assert
+            mockDataFrameProxy.Verify(m => m.Repartition(numPartitions, new[] { mockColumn1Proxy }), Times.Once());
+
+            // act
+            dataFrame.Repartition(new[] { mockColumn });
+            // assert
+            mockDataFrameProxy.Verify(m => m.Repartition(new[] { mockColumn1Proxy }), Times.Once());
+        }
+
        [Test]
        public void TestSample()
        {
@ -968,6 +1127,60 @@ namespace AdapterTest
            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrame.DataFrameProxy);
        }

+        [Test]
+        public void TestSelect_ColumnName()
+        {
+            var expectedResultDataFrameProxy = new Mock<IDataFrameProxy>().Object;
+            mockDataFrameProxy.Setup(m => m.Select(It.IsAny<string>(), It.IsAny<string[]>())).Returns(expectedResultDataFrameProxy);
+            var sc = new SparkContext(null);
+
+            const string column1Name = "colName1";
+            const string column2Name = "colName2";
+
+            // Act
+            var originalDataFrame = new DataFrame(mockDataFrameProxy.Object, sc);
+            var actualResultDataFrame = originalDataFrame.Select(column1Name, column2Name);
+
+            // Assert
+            mockDataFrameProxy.Verify(m => m.Select(column1Name, new [] { column2Name } ));
+            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrame.DataFrameProxy);
+        }
+
+        [Test]
+        public void TestSelectExpr()
+        {
+            var expectedResultDataFrameProxy = new Mock<IDataFrameProxy>().Object;
+            mockDataFrameProxy.Setup(m => m.SelectExpr(It.IsAny<string[]>())).Returns(expectedResultDataFrameProxy);
+            var sc = new SparkContext(null);
+            
+            const string columnExpr = "colB as newName";
+
+            // Act
+            var originalDataFrame = new DataFrame(mockDataFrameProxy.Object, sc);
+            var actualResultDataFrame = originalDataFrame.SelectExpr(columnExpr);
+
+            // Assert
+            mockDataFrameProxy.Verify(m => m.SelectExpr(new[] { columnExpr }));
+            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrame.DataFrameProxy);
+        }
+
+        [Test]
+        public void TestWhere()
+        {
+            var expectedResultDataFrameProxy = new Mock<IDataFrameProxy>().Object;
+            mockDataFrameProxy.Setup(m => m.Filter(It.IsAny<string>())).Returns(expectedResultDataFrameProxy);
+            var sc = new SparkContext(null);
+
+            const string condition = "Filter Condition";
+            // Act
+            var originalDataFrame = new DataFrame(mockDataFrameProxy.Object, sc);
+            var actualResultDataFrame = originalDataFrame.Where(condition);
+
+            // Assert
+            mockDataFrameProxy.Verify(m => m.Filter(condition));
+            Assert.AreEqual(expectedResultDataFrameProxy, actualResultDataFrame.DataFrameProxy);
+        }
+
        [Test]
        public void TestWithColumn()
        {
@ -1186,6 +1399,26 @@ namespace AdapterTest

        #region GroupedDataTest

+        [Test]
+        public void TestAgg()
+        {
+            // Arrange
+            var expectedResultDataFrameProxy = new Mock<IDataFrameProxy>().Object;
+            var mockGroupedDataProxy = new Mock<IGroupedDataProxy>();
+            mockDataFrameProxy.Setup(m => m.GroupBy()).Returns(mockGroupedDataProxy.Object);
+            mockDataFrameProxy.Setup(m => m.Agg(It.IsAny<IGroupedDataProxy>(), It.IsAny<Dictionary<string, string>>())).Returns(expectedResultDataFrameProxy);
+            var sc = new SparkContext(null);
+
+            var columnNameAggFuncDic = new Dictionary<string, string> {{"name", "count"}};
+            // Act
+            var originalDataFrame = new DataFrame(mockDataFrameProxy.Object, sc);
+            var actualResult = originalDataFrame.Agg(columnNameAggFuncDic);
+
+            // Assert
+            mockDataFrameProxy.Verify(m => m.Agg(mockGroupedDataProxy.Object, columnNameAggFuncDic)); // assert Agg was invoked with correct parameters
+            Assert.AreEqual(expectedResultDataFrameProxy, actualResult.DataFrameProxy);
+        }
+
        [Test]
        public void TestCount()
        {
--- a/csharp/AdapterTest/EventHubsUtilsTest.cs
+++ b/csharp/AdapterTest/EventHubsUtilsTest.cs
@ -0,0 +1,39 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using AdapterTest.Mocks;
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop;
+using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Streaming;
+using Moq;
+using NUnit.Framework;
+
+namespace AdapterTest
+{
+    [TestFixture]
+    public class EventHubsUtilsTest
+    {
+        [Test]
+        public void TestCreateUnionStream()
+        {
+            var streamingContextProxy = new Mock<IStreamingContextProxy>();
+            var mockDstreamProxy = new Mock<IDStreamProxy>().Object;
+            streamingContextProxy.Setup(
+                                    m => m.EventHubsUnionStream(It.IsAny<Dictionary<string, string>>(), It.IsAny<StorageLevelType>()))
+                                .Returns(mockDstreamProxy);
+
+            var mockSparkClrProxy = new Mock<ISparkCLRProxy>();
+            mockSparkClrProxy.Setup(m => m.CreateStreamingContext(It.IsAny<SparkContext>(), It.IsAny<int>()))
+                .Returns(streamingContextProxy.Object);
+            SparkCLREnvironment.SparkCLRProxy = mockSparkClrProxy.Object;
+
+            var sparkContext = new SparkContext(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy, new SparkConf(new Mock<ISparkConfProxy>().Object));
+            var streamingContext = new StreamingContext(sparkContext, 123);
+            var dstream = EventHubsUtils.CreateUnionStream(streamingContext, new Dictionary<string, string>());
+            Assert.AreEqual(mockDstreamProxy, dstream.DStreamProxy);
+        }
+    }
+}
--- a/csharp/AdapterTest/FunctionsTest.cs
+++ b/csharp/AdapterTest/FunctionsTest.cs
@ -565,25 +565,25 @@ namespace AdapterTest
        {
            mockSparkContextProxy.Setup(m => m.CreateWindowFunction(It.IsAny<string>()));
            Functions.RowNumber();
-            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("rowNumber"), Times.Once);
+            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("row_number"), Times.Once);

            Functions.DenseRank();
-            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("denseRank"), Times.Once);
+            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("dense_rank"), Times.Once);

            Functions.Rank();
            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("rank"), Times.Once);

            Functions.CumeDist();
-            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("cumeDist"), Times.Once);
+            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("cume_dist"), Times.Once);

            Functions.PercentRank();
-            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("percentRank"), Times.Once);
+            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("percent_rank"), Times.Once);

            Functions.MonotonicallyIncreasingId();
-            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("monotonicallyIncreasingId"), Times.Once);
+            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("monotonically_increasing_id"), Times.Once);

            Functions.SparkPartitionId();
-            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("sparkPartitionId"), Times.Once);
+            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("spark_partition_id"), Times.Once);

            Functions.Rand();
            mockSparkContextProxy.Verify(m => m.CreateWindowFunction("rand"), Times.Once);
@ -594,6 +594,60 @@ namespace AdapterTest

        #endregion

+        #region udf functions
+
+        [Test]
+        public void TestUdfFunction()
+        {
+            var mockUdfProxy = new Mock<IUDFProxy>();
+            mockUdfProxy.Setup(m => m.Apply(It.IsAny<IColumnProxy[]>()));
+            mockSparkContextProxy.Setup(m => m.CreateUserDefinedCSharpFunction(It.IsAny<string>(), It.IsAny<byte[]>(), It.IsAny<string>())).Returns(mockUdfProxy.Object);
+
+            Functions.Udf(() => 0).Invoke();
+            mockUdfProxy.Verify(m => m.Apply(new IColumnProxy[] { }), Times.Once);
+
+            var column1 = GeneratorColum();
+            Functions.Udf<int, int>(i => 1).Invoke(column1);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy }), Times.Once);
+
+            var column2 = GeneratorColum();
+            Functions.Udf<int, int, int>( (i1, i2) => 2).Invoke(column1, column2);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy, column2.ColumnProxy }), Times.Once);
+
+            var column3 = GeneratorColum();
+            Functions.Udf<int, int, int, int>((i1, i2, i3) => 3).Invoke(column1, column2, column3);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy, column2.ColumnProxy, column3.ColumnProxy }), Times.Once);
+
+            var column4 = GeneratorColum();
+            Functions.Udf<int, int, int, int, int>((i1, i2, i3, i4) => 4).Invoke(column1, column2, column3, column4);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy, column2.ColumnProxy, column3.ColumnProxy, column4.ColumnProxy }), Times.Once);
+
+            var column5 = GeneratorColum();
+            Functions.Udf<int, int, int, int, int, int>((i1, i2, i3, i4, i5) => 5).Invoke(column1, column2, column3, column4, column5);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy, column2.ColumnProxy, column3.ColumnProxy, column4.ColumnProxy, column5.ColumnProxy }), Times.Once);
+
+            var column6 = GeneratorColum();
+            Functions.Udf<int, int, int, int, int, int, int>((i1, i2, i3, i4, i5, i6) => 6).Invoke(column1, column2, column3, column4, column5, column6);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy, column2.ColumnProxy, column3.ColumnProxy, column4.ColumnProxy, column5.ColumnProxy, column6.ColumnProxy }), Times.Once);
+
+            var column7 = GeneratorColum();
+            Functions.Udf<int, int, int, int, int, int, int, int>((i1, i2, i3, i4, i5, i6, i7) => 7).Invoke(column1, column2, column3, column4, column5, column6, column7);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy, column2.ColumnProxy, column3.ColumnProxy, column4.ColumnProxy, column5.ColumnProxy, column6.ColumnProxy, column7.ColumnProxy }), Times.Once);
+
+            var column8 = GeneratorColum();
+            Functions.Udf<int, int, int, int, int, int, int, int, int>((i1, i2, i3, i4, i5, i6, i7, i8) => 8).Invoke(column1, column2, column3, column4, column5, column6, column7, column8);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy, column2.ColumnProxy, column3.ColumnProxy, column4.ColumnProxy, column5.ColumnProxy, column6.ColumnProxy, column7.ColumnProxy, column8.ColumnProxy }), Times.Once);
+
+            var column9 = GeneratorColum();
+            Functions.Udf<int, int, int, int, int, int, int, int, int, int>((i1, i2, i3, i4, i5, i6, i7, i8, i9) => 9).Invoke(column1, column2, column3, column4, column5, column6, column7, column8, column9);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy, column2.ColumnProxy, column3.ColumnProxy, column4.ColumnProxy, column5.ColumnProxy, column6.ColumnProxy, column7.ColumnProxy, column8.ColumnProxy, column9.ColumnProxy }), Times.Once);
+
+            var column10 = GeneratorColum();
+            Functions.Udf<int, int, int, int, int, int, int, int, int, int, int>((i1, i2, i3, i4, i5, i6, i7, i8, i9, i10) => 10).Invoke(column1, column2, column3, column4, column5, column6, column7, column8, column9, column10);
+            mockUdfProxy.Verify(m => m.Apply(new[] { column1.ColumnProxy, column2.ColumnProxy, column3.ColumnProxy, column4.ColumnProxy, column5.ColumnProxy, column6.ColumnProxy, column7.ColumnProxy, column8.ColumnProxy, column9.ColumnProxy, column10.ColumnProxy }), Times.Once);
+        }
+        #endregion
+
        private Column GeneratorColum()
        {
            Mock<IColumnProxy> mockColumnProxy = new Mock<IColumnProxy>();
--- a/csharp/AdapterTest/HiveContextTest.cs
+++ b/csharp/AdapterTest/HiveContextTest.cs
@ -0,0 +1,67 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using AdapterTest.Mocks;
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Sql;
+using NUnit.Framework;
+using Moq;
+using Microsoft.Spark.CSharp.Interop;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
+using System.Collections.Generic;
+
+namespace AdapterTest
+{
+    /// <summary>
+    /// Validates interaction between SqlContext and its proxies
+    /// </summary>
+    [TestFixture]
+    public class HiveContextTest
+    {
+        private static Mock<ISqlContextProxy> mockSqlContextProxy;
+
+        [OneTimeSetUp]
+        public static void ClassInitialize()
+        {
+            mockSqlContextProxy = new Mock<ISqlContextProxy>();
+        }
+
+        [SetUp]
+        public void TestInitialize()
+        {
+            mockSqlContextProxy.Reset();
+        }
+
+        [TearDown]
+        public void TestCleanUp()
+        {
+            // Revert to use Static mock class to prevent blocking other test methods which uses static mock class
+            SparkCLREnvironment.SparkCLRProxy = new MockSparkCLRProxy();
+        }
+
+        [Test]
+        public void TestHiveContextConstructor()
+        {
+            var hiveContext = new HiveContext(new SparkContext("", ""));
+            Assert.IsNotNull((hiveContext.SqlContextProxy as MockSqlContextProxy).mockSqlContextReference);
+        }
+        
+        [Test]
+        public void TestHiveContextRefreshTable()
+        {
+            // arrange
+            var mockSparkContextProxy = new Mock<ISparkContextProxy>();
+            mockSqlContextProxy.Setup(m => m.RefreshTable(It.IsAny<string>()));
+            var hiveContext = new HiveContext(new SparkContext("", ""), mockSqlContextProxy.Object);
+
+            // act
+            hiveContext.RefreshTable("table");
+
+            // assert
+            mockSqlContextProxy.Verify(m => m.RefreshTable("table"));
+        }
+    }
+}
--- a/csharp/AdapterTest/Mocks/MockDStreamProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockDStreamProxy.cs
@ -57,7 +57,7 @@ namespace AdapterTest.Mocks
        {
        }

-        public void Checkpoint(long intervalMs)
+        public void Checkpoint(int intervalSeconds)
        {
        }

--- a/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs
@ -146,12 +146,12 @@ namespace AdapterTest.Mocks

        public IDataFrameProxy Join(IDataFrameProxy otherScalaDataFrameReference, string[] joinColumnNames)
        {
-            throw new NotImplementedException();
+            return new MockDataFrameProxy(new object[] { otherScalaDataFrameReference, joinColumnNames }, SqlContextProxy);
        }

        public IDataFrameProxy Join(IDataFrameProxy otherScalaDataFrameReference, IColumnProxy scalaColumnReference, string joinType)
        {
-            throw new NotImplementedException();
+            return new MockDataFrameProxy(new object[] { otherScalaDataFrameReference, scalaColumnReference, joinType }, SqlContextProxy);
        }

        public bool IsLocal
@ -329,5 +329,20 @@ namespace AdapterTest.Mocks
        {
            throw new NotImplementedException();
        }
+
+        public IDataFrameProxy Repartition(int numPartitions, IColumnProxy[] columns)
+        {
+            throw new NotImplementedException();
+        }
+
+        public IDataFrameProxy Repartition(IColumnProxy[] columns)
+        {
+            throw new NotImplementedException();
+        }
+
+        public IDataFrameProxy SortWithinPartitions(IColumnProxy[] columns)
+        {
+            throw new NotImplementedException();
+        }
    }
 }
--- a/csharp/AdapterTest/Mocks/MockRddProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockRddProxy.cs
@ -18,6 +18,7 @@ using NUnit.Framework;

 namespace AdapterTest.Mocks
 {
+    [Serializable]
    internal class MockRddProxy : IRDDProxy
    {
        internal IEnumerable<dynamic> result;
@ -64,11 +65,6 @@ namespace AdapterTest.Mocks
            return MockSparkContextProxy.RunJob(this);
        }

-        public int PartitionLength()
-        {
-            return 1;
-        }
-
        public void Cache()
        { }

--- a/csharp/AdapterTest/Mocks/MockSparkCLRProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkCLRProxy.cs
@ -58,7 +58,7 @@ namespace AdapterTest.Mocks
            return false;
        }

-        public IStreamingContextProxy CreateStreamingContext(SparkContext sparkContext, long durationMs)
+        public IStreamingContextProxy CreateStreamingContext(SparkContext sparkContext, int durationSeconds)
        {
            streamingContextProxy = new MockStreamingContextProxy();
            return streamingContextProxy;
--- a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
@ -3,21 +3,16 @@

 using System;
 using System.Collections.Generic;
-using System.Diagnostics;
 using System.IO;
 using System.Linq;
 using System.Net;
-using System.Net.Sockets;
-using System.Runtime.CompilerServices;
 using System.Runtime.Serialization;
 using System.Runtime.Serialization.Formatters.Binary;
-using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Proxy;
-using Microsoft.Spark.CSharp.Proxy.Ipc;
 using Microsoft.Spark.CSharp.Interop.Ipc;
-using NUnit.Framework;
+using Microsoft.Spark.CSharp.Network;

 namespace AdapterTest.Mocks
 {
@ -33,7 +28,7 @@ namespace AdapterTest.Mocks
        }

        public void AddFile(string filePath)
-        {}
+        { }

        public IRDDProxy TextFile(string filePath, int minPartitions)
        {
@ -84,14 +79,14 @@ namespace AdapterTest.Mocks
            }
        }

-        public IRDDProxy CreatePairwiseRDD(IRDDProxy javaReferenceInByteArrayRdd, int numPartitions)
+        public IRDDProxy CreatePairwiseRDD(IRDDProxy javaReferenceInByteArrayRdd, int numPartitions, long partitionFuncId)
        {
            return javaReferenceInByteArrayRdd;
        }


        public void SetLogLevel(string logLevel)
-        {}
+        { }

        public string Version
        {
@ -204,13 +199,13 @@ namespace AdapterTest.Mocks
                    return ms.ToArray();
                });

-            TcpListener listener = new TcpListener(IPAddress.Loopback, 0);
-            listener.Start();
+            var listener = SocketFactory.CreateSocket();
+            listener.Listen();

            Task.Run(() =>
            {
-                using (Socket socket = listener.AcceptSocket())
-                using (Stream ns = new NetworkStream(socket))
+                using (var socket = listener.Accept())
+                using (var ns = socket.GetStream())
                {
                    foreach (var item in result)
                    {
@ -219,7 +214,7 @@ namespace AdapterTest.Mocks
                    }
                }
            });
-            return (listener.LocalEndpoint as IPEndPoint).Port;
+            return (listener.LocalEndPoint as IPEndPoint).Port;
        }

        public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
@ -282,6 +277,11 @@ namespace AdapterTest.Mocks
            return new MockSqlContextProxy(this);
        }

+        public ISqlContextProxy CreateHiveContext()
+        {
+            return new MockSqlContextProxy(this);
+        }
+
        public IRDDProxy Parallelize(IEnumerable<byte[]> values, int numSlices)
        {
            return new MockRddProxy(null);
--- a/csharp/AdapterTest/Mocks/MockSqlContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSqlContextProxy.cs
@ -68,5 +68,80 @@ namespace AdapterTest.Mocks
        {
            throw new NotImplementedException();
        }
+
+        public ISqlContextProxy NewSession()
+        {
+            throw new NotImplementedException();
+        }
+
+        public string GetConf(string key, string defaultValue)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void SetConf(string key, string value)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void RegisterDataFrameAsTable(IDataFrameProxy dataFrameProxy, string tableName)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void DropTempTable(string tableName)
+        {
+            throw new NotImplementedException();
+        }
+
+        public IDataFrameProxy Table(string tableName)
+        {
+            throw new NotImplementedException();
+        }
+
+        public IDataFrameProxy Tables()
+        {
+            throw new NotImplementedException();
+        }
+
+        public IDataFrameProxy Tables(string databaseName)
+        {
+            throw new NotImplementedException();
+        }
+
+        public IEnumerable<string> TableNames()
+        {
+            throw new NotImplementedException();
+        }
+
+        public void CacheTable(string tableName)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void UncacheTable(string tableName)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void ClearCache()
+        {
+            throw new NotImplementedException();
+        }
+
+        public IEnumerable<string> TableNames(string databaseName)
+        {
+            throw new NotImplementedException();
+        }
+
+        public bool IsCached(string tableName)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void RefreshTable(string tableName)
+        {
+            throw new NotImplementedException();
+        }
    }
 }
--- a/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs
@ -18,20 +18,16 @@ namespace AdapterTest.Mocks
    {
        private IFormatter formatter = new BinaryFormatter();
        public void Start()
-        {
-        }
+        {}

        public void Stop()
-        {
-        }
+        {}

-        public void Remember(long durationMs)
-        {
-        }
+        public void Remember(int durationSeconds)
+        {}

        public void Checkpoint(string directory)
-        {
-        }
+        {}

        public IDStreamProxy TextFileStream(string directory)
        {
@ -53,6 +49,12 @@ namespace AdapterTest.Mocks
            return new MockDStreamProxy();
        }

+        public IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets,
+            int numPartitions, byte[] readFunc, string serializationMode)
+        {
+            return new MockDStreamProxy();
+        }
+
        public IDStreamProxy Union(IDStreamProxy firstDStreams, IDStreamProxy[] otherDStreams)
        {
            return new MockDStreamProxy();
@ -62,7 +64,7 @@ namespace AdapterTest.Mocks
        {
        }

-        public void AwaitTermination(int timeout)
+        public void AwaitTerminationOrTimeout(long timeout)
        {
        }

@ -102,10 +104,24 @@ namespace AdapterTest.Mocks
        {
            Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> f = (Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>)formatter.Deserialize(new MemoryStream(func));
            RDD<dynamic> rdd = f(DateTime.UtcNow.Ticks,
-                new RDD<dynamic>((jdstream as MockDStreamProxy).rddProxy ?? new MockRddProxy(null), new SparkContext("", "")),
+                null,
                new RDD<dynamic>((jdstream as MockDStreamProxy).rddProxy ?? new MockRddProxy(null), new SparkContext("", "")));
            return new MockDStreamProxy(rdd.RddProxy);
        }

+        public IDStreamProxy CreateConstantInputDStream(IRDDProxy rddProxy)
+        {
+            return new MockDStreamProxy();
+        }
+
+        public IDStreamProxy EventHubsUnionStream(Dictionary<string, string> eventHubsParams, StorageLevelType storageLevelType)
+        {
+            throw new NotImplementedException();
+        }
+
+        public IDStreamProxy KafkaMetaStream(byte[] metaParams, uint numPartitions)
+        {
+            throw new NotImplementedException();
+        }
    }
 }
--- a/csharp/AdapterTest/PairRDDTest.cs
+++ b/csharp/AdapterTest/PairRDDTest.cs
@ -1,9 +1,8 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
-using AdapterTest.Mocks;
+using System.Linq;
 using Microsoft.Spark.CSharp.Core;
-using Microsoft.Spark.CSharp.Interop.Ipc;
 using NUnit.Framework;

 namespace AdapterTest
@ -155,6 +154,30 @@ namespace AdapterTest
            Assert.AreEqual(9, records.Length);
        }

+        [Test]
+        public void TestPairRddPartitionBy()
+        {
+            Func<dynamic, int> partitionFunc = key => 1;
+            var rddPartitionBy = pairs.PartitionBy(3, partitionFunc);
+            Assert.AreEqual(new Partitioner(3, partitionFunc), rddPartitionBy.partitioner);
+        }
+
+        [Test]
+        public void TestPairRddSortByKey()
+        {
+            var expectedSortedRdd = pairs.Collect().OrderBy(kv => kv.Key, StringComparer.OrdinalIgnoreCase).ToArray();
+            var rddSortByKey = pairs.SortByKey(true, null, key => key.ToLowerInvariant()).Collect();
+            CollectionAssert.AreEqual(expectedSortedRdd, rddSortByKey);
+        }
+
+        [Test]
+        public void TestPairRddSortByKey2()
+        {
+            var expectedSortedRdd = pairs.Collect().OrderBy(kv => kv.Key, StringComparer.OrdinalIgnoreCase).ToArray();
+            var rddSortByKey = pairs.SortByKey(true, 1, key => key.ToLowerInvariant()).Collect();
+            CollectionAssert.AreEqual(expectedSortedRdd, rddSortByKey);
+        }
+
        [Test]
        public void TestPairRddProxy()
        {
--- a/Показать больше
+++ b/Показать больше