added new F# example
* moved existing F# example to have all F# examples under a single folder * added copyright notice to existing F# examples * updated documentation
This commit is contained in:
Родитель
a2fe4f5dee
Коммит
da03e58dbe
|
@ -20,14 +20,18 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Pi", "Batch\pi\Pi.csproj",
|
|||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WordCount", "Batch\WordCount\WordCount.csproj", "{17E4C27F-8441-425A-B82B-23BA5E313CC4}"
|
||||
EndProject
|
||||
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "WordCountFSharp", "Batch\WordCountFSharp\WordCountFSharp.fsproj", "{17B63D32-EFC8-4EF5-831A-197A4FC29F06}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Kafka", "Streaming\Kafka\Kafka.csproj", "{8764EAAA-9D32-4549-A64F-C7C89B014EA6}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HiveDataFrame", "Sql\HiveDataFrame\HiveDataFrame.csproj", "{5C97498A-C4DB-43DD-86AD-4E50DEE8D405}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CassandraDataFrame", "Sql\CassandraDataFrame\CassandraDataFrame.csproj", "{9FCC75C4-347F-44E8-9B07-C5273066DF9C}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FSharpExamples", "FSharpExamples", "{57B99251-5489-49C1-AEBA-E4B07D3F7E47}"
|
||||
EndProject
|
||||
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "WordCountFSharp", "fsharp\WordCount\WordCountFSharp.fsproj", "{17B63D32-EFC8-4EF5-831A-197A4FC29F06}"
|
||||
EndProject
|
||||
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "JsonDataFrame", "fsharp\JsonDataFrame\JsonDataFrame.fsproj", "{27F88C0D-9D19-4709-8C88-FDB768BAD5AC}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
|
@ -58,10 +62,6 @@ Global
|
|||
{17E4C27F-8441-425A-B82B-23BA5E313CC4}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{17E4C27F-8441-425A-B82B-23BA5E313CC4}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{17E4C27F-8441-425A-B82B-23BA5E313CC4}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{8764EAAA-9D32-4549-A64F-C7C89B014EA6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{8764EAAA-9D32-4549-A64F-C7C89B014EA6}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{8764EAAA-9D32-4549-A64F-C7C89B014EA6}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
|
@ -74,6 +74,14 @@ Global
|
|||
{9FCC75C4-347F-44E8-9B07-C5273066DF9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{9FCC75C4-347F-44E8-9B07-C5273066DF9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{9FCC75C4-347F-44E8-9B07-C5273066DF9C}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{27F88C0D-9D19-4709-8C88-FDB768BAD5AC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{27F88C0D-9D19-4709-8C88-FDB768BAD5AC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{27F88C0D-9D19-4709-8C88-FDB768BAD5AC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{27F88C0D-9D19-4709-8C88-FDB768BAD5AC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
@ -85,9 +93,10 @@ Global
|
|||
{DF840BFB-B3A3-493D-B667-4CF21ADBFCAD} = {6F90310A-2DA2-4E81-A062-8D8A9F47C25B}
|
||||
{913E6A56-9839-4379-8B3C-855BA9341663} = {AE001E84-471E-4D02-BDDE-40B85915CEAE}
|
||||
{17E4C27F-8441-425A-B82B-23BA5E313CC4} = {AE001E84-471E-4D02-BDDE-40B85915CEAE}
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06} = {AE001E84-471E-4D02-BDDE-40B85915CEAE}
|
||||
{8764EAAA-9D32-4549-A64F-C7C89B014EA6} = {6F90310A-2DA2-4E81-A062-8D8A9F47C25B}
|
||||
{5C97498A-C4DB-43DD-86AD-4E50DEE8D405} = {28600A86-E011-41C9-AB41-591580EDB9F1}
|
||||
{9FCC75C4-347F-44E8-9B07-C5273066DF9C} = {28600A86-E011-41C9-AB41-591580EDB9F1}
|
||||
{17B63D32-EFC8-4EF5-831A-197A4FC29F06} = {57B99251-5489-49C1-AEBA-E4B07D3F7E47}
|
||||
{27F88C0D-9D19-4709-8C88-FDB768BAD5AC} = {57B99251-5489-49C1-AEBA-E4B07D3F7E47}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
namespace JsonDataFrame.AssemblyInfo
|
||||
|
||||
open System.Reflection
|
||||
open System.Runtime.CompilerServices
|
||||
open System.Runtime.InteropServices
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[<assembly: AssemblyTitle("Mobius-FSharp-JsonDataFrame")>]
|
||||
[<assembly: AssemblyDescription("Mobius DataFrame example in FSharp.")>]
|
||||
[<assembly: AssemblyConfiguration("")>]
|
||||
[<assembly: AssemblyCompany("")>]
|
||||
[<assembly: AssemblyProduct("JsonDataFrame")>]
|
||||
[<assembly: AssemblyCopyright("Copyright © Microsoft Corporation 2016")>]
|
||||
[<assembly: AssemblyTrademark("")>]
|
||||
[<assembly: AssemblyCulture("")>]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[<assembly: ComVisible(false)>]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[<assembly: Guid("27f88c0d-9d19-4709-8c88-fdb768bad5ac")>]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [<assembly: AssemblyVersion("1.0.*")>]
|
||||
[<assembly: AssemblyVersion("1.0.0.0")>]
|
||||
[<assembly: AssemblyFileVersion("1.0.0.0")>]
|
||||
|
||||
do
|
||||
()
|
|
@ -0,0 +1,101 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<SchemaVersion>2.0</SchemaVersion>
|
||||
<ProjectGuid>27f88c0d-9d19-4709-8c88-fdb768bad5ac</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<RootNamespace>JsonDataFrame</RootNamespace>
|
||||
<AssemblyName>JsonDataFrame</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
<TargetFSharpCoreVersion>4.4.0.0</TargetFSharpCoreVersion>
|
||||
<Name>JsonDataFrame</Name>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<Tailcalls>false</Tailcalls>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<WarningLevel>3</WarningLevel>
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DocumentationFile>bin\Debug\JsonDataFrame.XML</DocumentationFile>
|
||||
<Prefer32Bit>true</Prefer32Bit>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<Tailcalls>true</Tailcalls>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<WarningLevel>3</WarningLevel>
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DocumentationFile>bin\Release\JsonDataFrame.XML</DocumentationFile>
|
||||
<Prefer32Bit>true</Prefer32Bit>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<MinimumVisualStudioVersion Condition="'$(MinimumVisualStudioVersion)' == ''">11</MinimumVisualStudioVersion>
|
||||
</PropertyGroup>
|
||||
<Choose>
|
||||
<When Condition="'$(VisualStudioVersion)' == '11.0'">
|
||||
<PropertyGroup Condition="Exists('$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.0\Framework\v4.0\Microsoft.FSharp.Targets')">
|
||||
<FSharpTargetsPath>$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.0\Framework\v4.0\Microsoft.FSharp.Targets</FSharpTargetsPath>
|
||||
</PropertyGroup>
|
||||
</When>
|
||||
<Otherwise>
|
||||
<PropertyGroup Condition="Exists('$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\FSharp\Microsoft.FSharp.Targets')">
|
||||
<FSharpTargetsPath>$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\FSharp\Microsoft.FSharp.Targets</FSharpTargetsPath>
|
||||
</PropertyGroup>
|
||||
</Otherwise>
|
||||
</Choose>
|
||||
<Import Project="$(FSharpTargetsPath)" />
|
||||
<ItemGroup>
|
||||
<Compile Include="AssemblyInfo.fs" />
|
||||
<Compile Include="Program.fs" />
|
||||
<Content Include="..\..\App.config">
|
||||
<Link>App.config</Link>
|
||||
</Content>
|
||||
<Content Include="packages.config" />
|
||||
<None Include="data.json">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="CSharpWorker">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\CSharpWorker.exe</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="log4net">
|
||||
<HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Spark.CSharp.Adapter">
|
||||
<HintPath>..\..\packages\Microsoft.SparkCLR.1.6.100\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="mscorlib" />
|
||||
<Reference Include="FSharp.Core, Version=$(TargetFSharpCoreVersion), Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json">
|
||||
<HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Pyrolite">
|
||||
<HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Razorvine.Serpent">
|
||||
<HintPath>..\..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Numerics" />
|
||||
</ItemGroup>
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
open Microsoft.Spark.CSharp.Core
|
||||
open Microsoft.Spark.CSharp.Services
|
||||
open Microsoft.Spark.CSharp.Sql
|
||||
open System.Reflection
|
||||
open System.Collections.Generic
|
||||
|
||||
[<EntryPoint>]
|
||||
let main args =
|
||||
match args with
|
||||
| [| filePath |] ->
|
||||
|
||||
let sparkContext = SparkContext(SparkConf().SetAppName "MobiusJsonDataFrame")
|
||||
let sqlContext = SqlContext sparkContext
|
||||
|
||||
//reading dataframe
|
||||
let dataframe = sqlContext.Read().Json(filePath)
|
||||
dataframe.ShowSchema()
|
||||
dataframe.Show()
|
||||
|
||||
//using DataFrame API
|
||||
let filteredDf = dataframe.Select("name", "address.state")
|
||||
.Where("state = 'California'")
|
||||
|
||||
//using Spark SQL
|
||||
filteredDf.RegisterTempTable "temptable123"
|
||||
let countAsDf = sqlContext.Sql "SELECT * FROM temptable123 where name='Bill'"
|
||||
let countOfRows = countAsDf.Count()
|
||||
printfn "Count of rows with name='Bill' and State='California' = %d" countOfRows
|
||||
|
||||
sparkContext.Stop()
|
||||
0
|
||||
| _ ->
|
||||
printfn "Usage: JsonDataFrame <file>"
|
||||
1
|
|
@ -0,0 +1,4 @@
|
|||
{"id":"123", "name":"Bill", "age":34, "address":{"city":"Columbus","state":"Ohio"}}
|
||||
{"id":"456", "name":"Steve", "age":14, "address":{"city":null, "state":"California"}}
|
||||
{ "id": "789", "name": "Bill", "age": 43, "address": { "city": "Seattle", "state": "Washington" } }
|
||||
{ "id": "531", "name": "Satya", "age": 46, "address": null }
|
|
@ -1,4 +1,7 @@
|
|||
namespace WordCountFSharp.AssemblyInfo
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
namespace WordCountFSharp.AssemblyInfo
|
||||
|
||||
open System.Reflection
|
||||
open System.Runtime.CompilerServices
|
|
@ -1,3 +1,6 @@
|
|||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
open Microsoft.Spark.CSharp.Core
|
||||
open Microsoft.Spark.CSharp.Services
|
||||
open System.Reflection
|
|
@ -0,0 +1,9 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="FSharp.Core" version="4.0.0.1" targetFramework="net45" />
|
||||
<package id="log4net" version="2.0.5" targetFramework="net45" />
|
||||
<package id="Microsoft.SparkCLR" version="1.6.100" targetFramework="net45" />
|
||||
<package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
|
||||
<package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
|
||||
<package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
|
||||
</packages>
|
|
@ -132,8 +132,8 @@ Instructions to run Mobius applications in Linux are available at [linux-instruc
|
|||
## Running Mobius Examples in Local Mode
|
||||
| Type | Examples |
|
||||
| ------------- |--------------|
|
||||
| Batch | <ul><li>[Pi](#pi-example-batch)</li><li>[Word Count](#wordcount-example-batch)</li></ul> |
|
||||
| SQL | <ul><li>[JDBC](#jdbc-example-sql)</li><li>[Spark-XML](#spark-xml-example-sql)</li><li>[Hive](#hive-example-sql)</li><li>[Cassandra](#cassandra-example-sql)</li></ul> |
|
||||
| Batch | <ul><li>[Pi](#pi-example-batch)</li><li>[Word Count](#wordcount-example-batch)</li><li>[Word Count (F#)](#wordcount-example---f-batch)</li></ul> |
|
||||
| SQL | <ul><li>[JDBC](#jdbc-example-sql)</li><li>[Spark-XML](#spark-xml-example-sql)</li><li>[Hive](#hive-example-sql)</li><li>[Cassandra](#cassandra-example-sql)</li><li>[JSON (F#)](#json-example---f-sql)</li></ul> |
|
||||
| Streaming | <ul><li>[Kafka](#kafka-example-streaming)</li><li>[EventHubs](#eventhubs-example-streaming)</li><li>[HDFS Word Count](#hdfswordcount-example-streaming)</li></ul> |
|
||||
|
||||
The following sample commands show how to run Mobius examples in local mode. Using the instruction above, the following sample commands can be tweaked to run in other modes
|
||||
|
@ -144,7 +144,18 @@ The following sample commands show how to run Mobius examples in local mode. Usi
|
|||
Computes the _approximate_ value of Pi using two appropaches and displays the value.
|
||||
|
||||
### WordCount Example (Batch)
|
||||
* Run `sparkclr-submit.cmd --exe SparkClrPi.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <inputFile>`
|
||||
* Run `sparkclr-submit.cmd --exe SparkClrPi.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <InputFilePath>`
|
||||
|
||||
`InputFilePath` should be in one of the following formats:
|
||||
* `hdfs://path/to/inputfile`
|
||||
* `file:///C:/path/to/inputfile`
|
||||
|
||||
### WordCount Example - F# (Batch)
|
||||
* Run `sparkclr-submit.cmd --exe WordCountFSharp.exe C:\Git\Mobius\examples\fsharp\WordCount\bin\Debug <InputFilePath>`
|
||||
|
||||
`InputFilePath` should be in one of the following formats:
|
||||
* `hdfs://path/to/inputfile`
|
||||
* `file:///C:/path/to/inputfile`
|
||||
|
||||
### JDBC Example (Sql)
|
||||
* Download a JDBC driver for the SQL Database you want to use
|
||||
|
@ -177,6 +188,15 @@ Reads data from a csv file, creates a Hive table and reads data from it
|
|||
|
||||
This sample reads data from a table, displays results in the console, performs filter on dataframe and writes results to another table
|
||||
|
||||
### JSON Example - F# (Sql)
|
||||
* Run `sparkclr-submit.cmd --exe JsonDataFrame.exe C:\Git\Mobius\examples\fsharp\JsonDataFrame\bin\Debug <InputFilePath>`
|
||||
|
||||
A file named data.json available in the same location as JsonDataFrame.exe and it may be used when running the example
|
||||
|
||||
`InputFilePath` should be in one of the following formats:
|
||||
* `hdfs://path/to/inputfile`
|
||||
* `file:///C:/path/to/inputfile`
|
||||
|
||||
### EventHubs Example (Streaming)
|
||||
* Get the following jar files
|
||||
* qpid-amqp-1-0-client-0.32.jar
|
||||
|
|
Загрузка…
Ссылка в новой задаче