Merge branch 'master' into dev

# Conflicts:
#	csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
This commit is contained in:
tqin 2016-03-31 10:43:29 +08:00
Родитель 0fabe0c983 831b5cb0dd
Коммит 3d9191102c
8 изменённых файлов: 274 добавлений и 10 удалений

Просмотреть файл

@ -161,7 +161,7 @@ namespace Microsoft.Spark.CSharp.Configuration
/// <summary>
/// Configuration mode for debug mode
/// This configuration exists only to make SparkCLR development & debugging easier
/// This configuration exists only to make SparkCLR development and debugging easier
/// </summary>
private class SparkCLRDebugConfiguration : SparkCLRLocalConfiguration
{
@ -212,12 +212,30 @@ namespace Microsoft.Spark.CSharp.Configuration
}
}
/// <summary>
/// The running mode used by Configuration Service
/// </summary>
public enum RunMode
{
/// <summary>
/// Unknown running mode
/// </summary>
UNKNOWN,
DEBUG, //not a Spark mode but exists for dev debugging purpose
/// <summary>
/// Debug mode, not a Spark mode but exists for develop debugging purpose
/// </summary>
DEBUG,
/// <summary>
/// Indicates service is running in local
/// </summary>
LOCAL,
/// <summary>
/// Indicates service is running in cluster
/// </summary>
CLUSTER,
/// <summary>
/// Indicates service is running in Yarn
/// </summary>
YARN
//MESOS //not currently supported
}

Просмотреть файл

@ -38,10 +38,23 @@ namespace Microsoft.Spark.CSharp.Core
[ThreadStatic] // Thread safe is needed when running in C# worker
internal static Dictionary<int, Accumulator> threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
/// <summary>
/// The identity of the accumulator
/// </summary>
protected int accumulatorId;
[NonSerialized] // When deserialized in C# worker, isDriver is false.
/// <summary>
/// Indicates whether the accumulator is on driver side.
/// When deserialized on worker side, isDriver is false by default.
/// </summary>
[NonSerialized]
protected bool isDriver = false;
}
/// <summary>
/// A generic version of <see cref="Accumulator"/> where the element type is specified by the driver program.
/// </summary>
/// <typeparam name="T">The type of element in the accumulator.</typeparam>
[Serializable]
public class Accumulator<T> : Accumulator
{
@ -49,6 +62,11 @@ namespace Microsoft.Spark.CSharp.Core
internal T value;
private readonly AccumulatorParam<T> accumulatorParam = new AccumulatorParam<T>();
/// <summary>
/// Initializes a new instance of the Accumulator class with a specified identity and a value.
/// </summary>
/// <param name="accumulatorId">The Identity of the accumulator</param>
/// <param name="value">The value of the accumulator</param>
public Accumulator(int accumulatorId, T value)
{
this.accumulatorId = accumulatorId;
@ -57,6 +75,10 @@ namespace Microsoft.Spark.CSharp.Core
accumulatorRegistry[accumulatorId] = this;
}
/// <summary>
/// Gets or sets the value of the accumulator; only usable in driver program
/// </summary>
/// <exception cref="ArgumentException"></exception>
public T Value
{
// Get the accumulator's value; only usable in driver program
@ -115,6 +137,10 @@ namespace Microsoft.Spark.CSharp.Core
return self;
}
/// <summary>
/// Creates and returns a string representation of the current accumulator
/// </summary>
/// <returns>A string representation of the current accumulator</returns>
public override string ToString()
{
return string.Format("Accumulator<id={0}, value={1}>", accumulatorId, value);

Просмотреть файл

@ -31,6 +31,9 @@ namespace Microsoft.Spark.CSharp.Core
[Serializable]
public class Broadcast
{
/// <summary>
/// A thread-safe static collection that is used to store registered broadcast objects.
/// </summary>
[NonSerialized]
public static ConcurrentDictionary<long, Broadcast> broadcastRegistry = new ConcurrentDictionary<long, Broadcast>();
[NonSerialized]
@ -38,6 +41,11 @@ namespace Microsoft.Spark.CSharp.Core
internal long broadcastId;
internal Broadcast() { }
/// <summary>
/// Initializes a new instance of Broadcast class with a specified path.
/// </summary>
/// <param name="path">The path that to be set.</param>
public Broadcast(string path)
{
this.path = path;
@ -60,6 +68,11 @@ namespace Microsoft.Spark.CSharp.Core
}
}
}
/// <summary>
/// A generic version of <see cref="Broadcast"/> where the element can be specified.
/// </summary>
/// <typeparam name="T">The type of element in Broadcast</typeparam>
[Serializable]
public class Broadcast<T> : Broadcast
{

Просмотреть файл

@ -9,6 +9,9 @@ using System.Threading.Tasks;
namespace Microsoft.Spark.CSharp.Core
{
/// <summary>
/// Extra functions available on RDDs of Doubles through an implicit conversion.
/// </summary>
public static class DoubleRDDFunctions
{
/// <summary>

Просмотреть файл

@ -16,17 +16,32 @@ namespace Microsoft.Spark.CSharp.Core
private bool isDefined = false;
private T value;
/// <summary>
/// Initialize a instance of Option class without any value.
/// </summary>
public Option()
{ }
/// <summary>
/// Initializes a instance of Option class with a specific value.
/// </summary>
/// <param name="value">The value to be associated with the new instance.</param>
public Option(T value)
{
isDefined = true;
this.value = value;
}
/// <summary>
/// Indicates whether the option value is defined.
/// </summary>
public bool IsDefined { get { return isDefined; } }
/// <summary>
/// Returns the value of the option if Option.IsDefined is TRUE;
/// otherwise, throws an <see cref="ArgumentException"/>.
/// </summary>
/// <returns></returns>
public T GetValue()
{
if (isDefined) return value;

Просмотреть файл

@ -10,6 +10,10 @@ using System.Threading.Tasks;
namespace Microsoft.Spark.CSharp.Core
{
/// <summary>
/// Extra functions available on RDDs of (key, value) pairs where the key is sortable through
/// a function to sort the key.
/// </summary>
public static class OrderedRDDFunctions
{

Просмотреть файл

@ -50,12 +50,47 @@
overridden here
</summary>
</member>
<!-- Badly formed XML comment ignored for member "T:Microsoft.Spark.CSharp.Configuration.ConfigurationService.SparkCLRDebugConfiguration" -->
<member name="T:Microsoft.Spark.CSharp.Configuration.ConfigurationService.SparkCLRDebugConfiguration">
<summary>
Configuration mode for debug mode
This configuration exists only to make SparkCLR development and debugging easier
</summary>
</member>
<member name="M:Microsoft.Spark.CSharp.Configuration.ConfigurationService.SparkCLRDebugConfiguration.GetCSharpWorkerExePath">
<summary>
The full path of the CSharp external backend worker process.
</summary>
</member>
<member name="T:Microsoft.Spark.CSharp.Configuration.RunMode">
<summary>
The running mode used by Configuration Service
</summary>
</member>
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.UNKNOWN">
<summary>
Unknown running mode
</summary>
</member>
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.DEBUG">
<summary>
Debug mode, not a Spark mode but exists for develop debugging purpose
</summary>
</member>
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.LOCAL">
<summary>
Indicates service is running in local
</summary>
</member>
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.CLUSTER">
<summary>
Indicates service is running in cluster
</summary>
</member>
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.YARN">
<summary>
Indicates service is running in Yarn
</summary>
</member>
<member name="T:Microsoft.Spark.CSharp.Core.Accumulator">
<summary>
A shared variable that can be accumulated, i.e., has a commutative and associative "add"
@ -71,6 +106,30 @@
</summary>
</member>
<member name="F:Microsoft.Spark.CSharp.Core.Accumulator.accumulatorId">
<summary>
The identity of the accumulator
</summary>
</member>
<member name="F:Microsoft.Spark.CSharp.Core.Accumulator.isDriver">
<summary>
Indicates whether the accumulator is on driver side.
When deserialized on worker side, isDriver is false by default.
</summary>
</member>
<member name="T:Microsoft.Spark.CSharp.Core.Accumulator`1">
<summary>
A generic version of <see cref="T:Microsoft.Spark.CSharp.Core.Accumulator"/> where the element type is specified by the driver program.
</summary>
<typeparam name="T">The type of element in the accumulator.</typeparam>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.Accumulator`1.#ctor(System.Int32,`0)">
<summary>
Initializes a new instance of the Accumulator class with a specified identity and a value.
</summary>
<param name="accumulatorId">The Identity of the accumulator</param>
<param name="value">The value of the accumulator</param>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.Accumulator`1.Add(`0)">
<summary>
Adds a term to this accumulator's value
@ -86,6 +145,18 @@
<param name="term"></param>
<returns></returns>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.Accumulator`1.ToString">
<summary>
Creates and returns a string representation of the current accumulator
</summary>
<returns>A string representation of the current accumulator</returns>
</member>
<member name="P:Microsoft.Spark.CSharp.Core.Accumulator`1.Value">
<summary>
Gets or sets the value of the accumulator; only usable in driver program
</summary>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="T:Microsoft.Spark.CSharp.Core.AccumulatorParam`1">
<summary>
An AccumulatorParam that uses the + operators to add values. Designed for simple types
@ -131,6 +202,23 @@
</summary>
</member>
<member name="F:Microsoft.Spark.CSharp.Core.Broadcast.broadcastRegistry">
<summary>
A thread-safe static collection that is used to store registered broadcast objects.
</summary>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.Broadcast.#ctor(System.String)">
<summary>
Initializes a new instance of Broadcast class with a specified path.
</summary>
<param name="path">The path that to be set.</param>
</member>
<member name="T:Microsoft.Spark.CSharp.Core.Broadcast`1">
<summary>
A generic version of <see cref="T:Microsoft.Spark.CSharp.Core.Broadcast"/> where the element can be specified.
</summary>
<typeparam name="T">The type of element in Broadcast</typeparam>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.Broadcast`1.Unpersist(System.Boolean)">
<summary>
Delete cached copies of this broadcast on the executors.
@ -149,6 +237,29 @@
</summary>
<typeparam name="T"></typeparam>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.Option`1.#ctor">
<summary>
Initialize a instance of Option class without any value.
</summary>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.Option`1.#ctor(`0)">
<summary>
Initializes a instance of Option class with a specific value.
</summary>
<param name="value">The value to be associated with the new instance.</param>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.Option`1.GetValue">
<summary>
Returns the value of the option if Option.IsDefined is TRUE;
otherwise, throws an <see cref="T:System.ArgumentException"/>.
</summary>
<returns></returns>
</member>
<member name="P:Microsoft.Spark.CSharp.Core.Option`1.IsDefined">
<summary>
Indicates whether the option value is defined.
</summary>
</member>
<member name="T:Microsoft.Spark.CSharp.Core.Partitioner">
<summary>
An object that defines how the elements in a key-value pair RDD are partitioned by key.
@ -173,6 +284,11 @@
Interface for collect operation on RDD
</summary>
</member>
<member name="T:Microsoft.Spark.CSharp.Core.DoubleRDDFunctions">
<summary>
Extra functions available on RDDs of Doubles through an implicit conversion.
</summary>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.DoubleRDDFunctions.Sum(Microsoft.Spark.CSharp.Core.RDD{System.Double})">
<summary>
Add up the elements in this RDD.
@ -285,6 +401,12 @@
<param name="self"></param>
<returns></returns>
</member>
<member name="T:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions">
<summary>
Extra functions available on RDDs of (key, value) pairs where the key is sortable through
a function to sort the key.
</summary>
</member>
<member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.SortByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Boolean,System.Nullable{System.Int32})">
<summary>
Sorts this RDD, which is assumed to consist of KeyValuePair pairs.
@ -4049,7 +4171,7 @@
<param name="fromOffsets">Per-topic/partition Kafka offsets defining the (inclusive) starting point of the stream.</param>
<returns>A DStream object</returns>
</member>
<member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateDirectStreamWithRepartition(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.List{System.String},System.Collections.Generic.Dictionary{System.String,System.String},System.Collections.Generic.Dictionary{System.String,System.Int64},System.Int32)">
<member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateDirectStreamWithRepartition(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.List{System.String},System.Collections.Generic.Dictionary{System.String,System.String},System.Collections.Generic.Dictionary{System.String,System.Int64},System.UInt32)">
<summary>
Create an input stream that directly pulls messages from a Kafka Broker and specific offset.
@ -4077,9 +4199,6 @@
user hint on how many kafka RDD partitions to create instead of aligning with kafka partitions,
unbalanced kafka partitions and/or under-distributed data will be redistributed evenly across
a probably larger number of RDD partitions
if numPartitions > 0, then it will try to rebalance kafkaRDD based on this user hint.
if numPartitions == 0, then will fall back to the default behavior of KafkaRDD
else, then will try to reblance KafkaRDD according to spark.streaming.kafka.maxRatePerTask.* conf
</param>
<returns>A DStream object</returns>
</member>
@ -4506,4 +4625,4 @@
<typeparam name="U"></typeparam>
</member>
</members>
</doc>
</doc>

Просмотреть файл

@ -19,7 +19,22 @@
####Methods
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue"></font></td><td>Adds a term to this accumulator's value</td></tr><tr><td><font color="blue"></font></td><td>The += operator; adds a term to this accumulator's value</td></tr><tr><td><font color="blue"></font></td><td>Provide a "zero value" for the type</td></tr><tr><td><font color="blue"></font></td><td>Add two values of the accumulator's data type, returning a new value;</td></tr></table>
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue"></font></td><td>Adds a term to this accumulator's value</td></tr><tr><td><font color="blue"></font></td><td>The += operator; adds a term to this accumulator's value</td></tr><tr><td><font color="blue"></font></td><td>Creates and returns a string representation of the current accumulator</td></tr><tr><td><font color="blue"></font></td><td>Provide a "zero value" for the type</td></tr><tr><td><font color="blue"></font></td><td>Add two values of the accumulator's data type, returning a new value;</td></tr></table>
---
###<font color="#68228B">Microsoft.Spark.CSharp.Core.Accumulator`1</font>
####Summary
A generic version of where the element type is specified by the driver program.
The type of element in the accumulator.
####Methods
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Add</font></td><td>Adds a term to this accumulator's value</td></tr><tr><td><font color="blue">op_Addition</font></td><td>The += operator; adds a term to this accumulator's value</td></tr><tr><td><font color="blue">ToString</font></td><td>Creates and returns a string representation of the current accumulator</td></tr></table>
---
@ -74,6 +89,21 @@
---
###<font color="#68228B">Microsoft.Spark.CSharp.Core.Broadcast`1</font>
####Summary
A generic version of where the element can be specified.
The type of element in Broadcast
####Methods
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Unpersist</font></td><td>Delete cached copies of this broadcast on the executors.</td></tr></table>
---
###<font color="#68228B">Microsoft.Spark.CSharp.Core.Option`1</font>
####Summary
@ -83,6 +113,13 @@
####Methods
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetValue</font></td><td>Returns the value of the option if Option.IsDefined is TRUE; otherwise, throws an .</td></tr></table>
---
###<font color="#68228B">Microsoft.Spark.CSharp.Core.Partitioner</font>
####Summary
@ -112,6 +149,35 @@
Interface for collect operation on RDD
###<font color="#68228B">Microsoft.Spark.CSharp.Core.DoubleRDDFunctions</font>
####Summary
Extra functions available on RDDs of Doubles through an implicit conversion.
####Methods
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Sum</font></td><td>Add up the elements in this RDD. sc.Parallelize(new double[] {1.0, 2.0, 3.0}).Sum() 6.0</td></tr><tr><td><font color="blue">Stats</font></td><td>Return a object that captures the mean, variance and count of the RDD's elements in one operation.</td></tr><tr><td><font color="blue">Histogram</font></td><td>Compute a histogram using the provided buckets. The buckets are all open to the right except for the last which is closed. e.g. [1,10,20,50] means the buckets are [1,10) [10,20) [20,50], which means 1&lt;=x&lt;10, 10&lt;=x&lt;20, 20&lt;=x&lt;=50. And on the input of 1 and 50 we would have a histogram of 1,0,1. If your histogram is evenly spaced (e.g. [0, 10, 20, 30]), this can be switched from an O(log n) inseration to O(1) per element(where n = # buckets). Buckets must be sorted and not contain any duplicates, must be at least two elements. If `buckets` is a number, it will generates buckets which are evenly spaced between the minimum and maximum of the RDD. For example, if the min value is 0 and the max is 100, given buckets as 2, the resulting buckets will be [0,50) [50,100]. buckets must be at least 1 If the RDD contains infinity, NaN throws an exception If the elements in RDD do not vary (max == min) always returns a single bucket. It will return an tuple of buckets and histogram. &gt;&gt;&gt; rdd = sc.parallelize(range(51)) &gt;&gt;&gt; rdd.histogram(2) ([0, 25, 50], [25, 26]) &gt;&gt;&gt; rdd.histogram([0, 5, 25, 50]) ([0, 5, 25, 50], [5, 20, 26]) &gt;&gt;&gt; rdd.histogram([0, 15, 30, 45, 60]) # evenly spaced buckets ([0, 15, 30, 45, 60], [15, 15, 15, 6]) &gt;&gt;&gt; rdd = sc.parallelize(["ab", "ac", "b", "bd", "ef"]) &gt;&gt;&gt; rdd.histogram(("a", "b", "c")) (('a', 'b', 'c'), [2, 2])</td></tr><tr><td><font color="blue">Mean</font></td><td>Compute the mean of this RDD's elements. sc.Parallelize(new double[]{1, 2, 3}).Mean() 2.0</td></tr><tr><td><font color="blue">Variance</font></td><td>Compute the variance of this RDD's elements. sc.Parallelize(new double[]{1, 2, 3}).Variance() 0.666...</td></tr><tr><td><font color="blue">Stdev</font></td><td>Compute the standard deviation of this RDD's elements. sc.Parallelize(new double[]{1, 2, 3}).Stdev() 0.816...</td></tr><tr><td><font color="blue">SampleStdev</font></td><td>Compute the sample standard deviation of this RDD's elements (which corrects for bias in estimating the standard deviation by dividing by N-1 instead of N). sc.Parallelize(new double[]{1, 2, 3}).SampleStdev() 1.0</td></tr><tr><td><font color="blue">SampleVariance</font></td><td>Compute the sample variance of this RDD's elements (which corrects for bias in estimating the variance by dividing by N-1 instead of N). sc.Parallelize(new double[]{1, 2, 3}).SampleVariance() 1.0</td></tr></table>
---
###<font color="#68228B">Microsoft.Spark.CSharp.Core.OrderedRDDFunctions</font>
####Summary
Extra functions available on RDDs of (key, value) pairs where the key is sortable through
a function to sort the key.
####Methods
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">SortByKey``2</font></td><td>Sorts this RDD, which is assumed to consist of KeyValuePair pairs.</td></tr><tr><td><font color="blue">SortByKey``3</font></td><td>Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.</td></tr><tr><td><font color="blue">repartitionAndSortWithinPartitions``2</font></td><td>Repartition the RDD according to the given partitioner and, within each resulting partition, sort records by their keys. This is more efficient than calling `repartition` and then sorting within each partition because it can push the sorting down into the shuffle machinery.</td></tr></table>
---
###<font color="#68228B">Microsoft.Spark.CSharp.Core.PairRDDFunctions</font>
####Summary