Merge branch 'master' into dev
# Conflicts: # csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
This commit is contained in:
Коммит
3d9191102c
|
@ -161,7 +161,7 @@ namespace Microsoft.Spark.CSharp.Configuration
|
|||
|
||||
/// <summary>
|
||||
/// Configuration mode for debug mode
|
||||
/// This configuration exists only to make SparkCLR development & debugging easier
|
||||
/// This configuration exists only to make SparkCLR development and debugging easier
|
||||
/// </summary>
|
||||
private class SparkCLRDebugConfiguration : SparkCLRLocalConfiguration
|
||||
{
|
||||
|
@ -212,12 +212,30 @@ namespace Microsoft.Spark.CSharp.Configuration
|
|||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The running mode used by Configuration Service
|
||||
/// </summary>
|
||||
public enum RunMode
|
||||
{
|
||||
/// <summary>
|
||||
/// Unknown running mode
|
||||
/// </summary>
|
||||
UNKNOWN,
|
||||
DEBUG, //not a Spark mode but exists for dev debugging purpose
|
||||
/// <summary>
|
||||
/// Debug mode, not a Spark mode but exists for develop debugging purpose
|
||||
/// </summary>
|
||||
DEBUG,
|
||||
/// <summary>
|
||||
/// Indicates service is running in local
|
||||
/// </summary>
|
||||
LOCAL,
|
||||
/// <summary>
|
||||
/// Indicates service is running in cluster
|
||||
/// </summary>
|
||||
CLUSTER,
|
||||
/// <summary>
|
||||
/// Indicates service is running in Yarn
|
||||
/// </summary>
|
||||
YARN
|
||||
//MESOS //not currently supported
|
||||
}
|
||||
|
|
|
@ -38,10 +38,23 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
[ThreadStatic] // Thread safe is needed when running in C# worker
|
||||
internal static Dictionary<int, Accumulator> threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
|
||||
|
||||
/// <summary>
|
||||
/// The identity of the accumulator
|
||||
/// </summary>
|
||||
protected int accumulatorId;
|
||||
[NonSerialized] // When deserialized in C# worker, isDriver is false.
|
||||
|
||||
/// <summary>
|
||||
/// Indicates whether the accumulator is on driver side.
|
||||
/// When deserialized on worker side, isDriver is false by default.
|
||||
/// </summary>
|
||||
[NonSerialized]
|
||||
protected bool isDriver = false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A generic version of <see cref="Accumulator"/> where the element type is specified by the driver program.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type of element in the accumulator.</typeparam>
|
||||
[Serializable]
|
||||
public class Accumulator<T> : Accumulator
|
||||
{
|
||||
|
@ -49,6 +62,11 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
internal T value;
|
||||
private readonly AccumulatorParam<T> accumulatorParam = new AccumulatorParam<T>();
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the Accumulator class with a specified identity and a value.
|
||||
/// </summary>
|
||||
/// <param name="accumulatorId">The Identity of the accumulator</param>
|
||||
/// <param name="value">The value of the accumulator</param>
|
||||
public Accumulator(int accumulatorId, T value)
|
||||
{
|
||||
this.accumulatorId = accumulatorId;
|
||||
|
@ -57,6 +75,10 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
accumulatorRegistry[accumulatorId] = this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the value of the accumulator; only usable in driver program
|
||||
/// </summary>
|
||||
/// <exception cref="ArgumentException"></exception>
|
||||
public T Value
|
||||
{
|
||||
// Get the accumulator's value; only usable in driver program
|
||||
|
@ -115,6 +137,10 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
return self;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates and returns a string representation of the current accumulator
|
||||
/// </summary>
|
||||
/// <returns>A string representation of the current accumulator</returns>
|
||||
public override string ToString()
|
||||
{
|
||||
return string.Format("Accumulator<id={0}, value={1}>", accumulatorId, value);
|
||||
|
|
|
@ -31,6 +31,9 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
[Serializable]
|
||||
public class Broadcast
|
||||
{
|
||||
/// <summary>
|
||||
/// A thread-safe static collection that is used to store registered broadcast objects.
|
||||
/// </summary>
|
||||
[NonSerialized]
|
||||
public static ConcurrentDictionary<long, Broadcast> broadcastRegistry = new ConcurrentDictionary<long, Broadcast>();
|
||||
[NonSerialized]
|
||||
|
@ -38,6 +41,11 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
|
||||
internal long broadcastId;
|
||||
internal Broadcast() { }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of Broadcast class with a specified path.
|
||||
/// </summary>
|
||||
/// <param name="path">The path that to be set.</param>
|
||||
public Broadcast(string path)
|
||||
{
|
||||
this.path = path;
|
||||
|
@ -60,6 +68,11 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A generic version of <see cref="Broadcast"/> where the element can be specified.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type of element in Broadcast</typeparam>
|
||||
[Serializable]
|
||||
public class Broadcast<T> : Broadcast
|
||||
{
|
||||
|
|
|
@ -9,6 +9,9 @@ using System.Threading.Tasks;
|
|||
|
||||
namespace Microsoft.Spark.CSharp.Core
|
||||
{
|
||||
/// <summary>
|
||||
/// Extra functions available on RDDs of Doubles through an implicit conversion.
|
||||
/// </summary>
|
||||
public static class DoubleRDDFunctions
|
||||
{
|
||||
/// <summary>
|
||||
|
|
|
@ -16,17 +16,32 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
private bool isDefined = false;
|
||||
private T value;
|
||||
|
||||
/// <summary>
|
||||
/// Initialize a instance of Option class without any value.
|
||||
/// </summary>
|
||||
public Option()
|
||||
{ }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a instance of Option class with a specific value.
|
||||
/// </summary>
|
||||
/// <param name="value">The value to be associated with the new instance.</param>
|
||||
public Option(T value)
|
||||
{
|
||||
isDefined = true;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Indicates whether the option value is defined.
|
||||
/// </summary>
|
||||
public bool IsDefined { get { return isDefined; } }
|
||||
|
||||
/// <summary>
|
||||
/// Returns the value of the option if Option.IsDefined is TRUE;
|
||||
/// otherwise, throws an <see cref="ArgumentException"/>.
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public T GetValue()
|
||||
{
|
||||
if (isDefined) return value;
|
||||
|
|
|
@ -10,6 +10,10 @@ using System.Threading.Tasks;
|
|||
|
||||
namespace Microsoft.Spark.CSharp.Core
|
||||
{
|
||||
/// <summary>
|
||||
/// Extra functions available on RDDs of (key, value) pairs where the key is sortable through
|
||||
/// a function to sort the key.
|
||||
/// </summary>
|
||||
public static class OrderedRDDFunctions
|
||||
{
|
||||
|
||||
|
|
|
@ -50,12 +50,47 @@
|
|||
overridden here
|
||||
</summary>
|
||||
</member>
|
||||
<!-- Badly formed XML comment ignored for member "T:Microsoft.Spark.CSharp.Configuration.ConfigurationService.SparkCLRDebugConfiguration" -->
|
||||
<member name="T:Microsoft.Spark.CSharp.Configuration.ConfigurationService.SparkCLRDebugConfiguration">
|
||||
<summary>
|
||||
Configuration mode for debug mode
|
||||
This configuration exists only to make SparkCLR development and debugging easier
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Configuration.ConfigurationService.SparkCLRDebugConfiguration.GetCSharpWorkerExePath">
|
||||
<summary>
|
||||
The full path of the CSharp external backend worker process.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Configuration.RunMode">
|
||||
<summary>
|
||||
The running mode used by Configuration Service
|
||||
</summary>
|
||||
</member>
|
||||
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.UNKNOWN">
|
||||
<summary>
|
||||
Unknown running mode
|
||||
</summary>
|
||||
</member>
|
||||
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.DEBUG">
|
||||
<summary>
|
||||
Debug mode, not a Spark mode but exists for develop debugging purpose
|
||||
</summary>
|
||||
</member>
|
||||
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.LOCAL">
|
||||
<summary>
|
||||
Indicates service is running in local
|
||||
</summary>
|
||||
</member>
|
||||
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.CLUSTER">
|
||||
<summary>
|
||||
Indicates service is running in cluster
|
||||
</summary>
|
||||
</member>
|
||||
<member name="F:Microsoft.Spark.CSharp.Configuration.RunMode.YARN">
|
||||
<summary>
|
||||
Indicates service is running in Yarn
|
||||
</summary>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Core.Accumulator">
|
||||
<summary>
|
||||
A shared variable that can be accumulated, i.e., has a commutative and associative "add"
|
||||
|
@ -71,6 +106,30 @@
|
|||
|
||||
</summary>
|
||||
</member>
|
||||
<member name="F:Microsoft.Spark.CSharp.Core.Accumulator.accumulatorId">
|
||||
<summary>
|
||||
The identity of the accumulator
|
||||
</summary>
|
||||
</member>
|
||||
<member name="F:Microsoft.Spark.CSharp.Core.Accumulator.isDriver">
|
||||
<summary>
|
||||
Indicates whether the accumulator is on driver side.
|
||||
When deserialized on worker side, isDriver is false by default.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Core.Accumulator`1">
|
||||
<summary>
|
||||
A generic version of <see cref="T:Microsoft.Spark.CSharp.Core.Accumulator"/> where the element type is specified by the driver program.
|
||||
</summary>
|
||||
<typeparam name="T">The type of element in the accumulator.</typeparam>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.Accumulator`1.#ctor(System.Int32,`0)">
|
||||
<summary>
|
||||
Initializes a new instance of the Accumulator class with a specified identity and a value.
|
||||
</summary>
|
||||
<param name="accumulatorId">The Identity of the accumulator</param>
|
||||
<param name="value">The value of the accumulator</param>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.Accumulator`1.Add(`0)">
|
||||
<summary>
|
||||
Adds a term to this accumulator's value
|
||||
|
@ -86,6 +145,18 @@
|
|||
<param name="term"></param>
|
||||
<returns></returns>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.Accumulator`1.ToString">
|
||||
<summary>
|
||||
Creates and returns a string representation of the current accumulator
|
||||
</summary>
|
||||
<returns>A string representation of the current accumulator</returns>
|
||||
</member>
|
||||
<member name="P:Microsoft.Spark.CSharp.Core.Accumulator`1.Value">
|
||||
<summary>
|
||||
Gets or sets the value of the accumulator; only usable in driver program
|
||||
</summary>
|
||||
<exception cref="T:System.ArgumentException"></exception>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Core.AccumulatorParam`1">
|
||||
<summary>
|
||||
An AccumulatorParam that uses the + operators to add values. Designed for simple types
|
||||
|
@ -131,6 +202,23 @@
|
|||
|
||||
</summary>
|
||||
</member>
|
||||
<member name="F:Microsoft.Spark.CSharp.Core.Broadcast.broadcastRegistry">
|
||||
<summary>
|
||||
A thread-safe static collection that is used to store registered broadcast objects.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.Broadcast.#ctor(System.String)">
|
||||
<summary>
|
||||
Initializes a new instance of Broadcast class with a specified path.
|
||||
</summary>
|
||||
<param name="path">The path that to be set.</param>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Core.Broadcast`1">
|
||||
<summary>
|
||||
A generic version of <see cref="T:Microsoft.Spark.CSharp.Core.Broadcast"/> where the element can be specified.
|
||||
</summary>
|
||||
<typeparam name="T">The type of element in Broadcast</typeparam>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.Broadcast`1.Unpersist(System.Boolean)">
|
||||
<summary>
|
||||
Delete cached copies of this broadcast on the executors.
|
||||
|
@ -149,6 +237,29 @@
|
|||
</summary>
|
||||
<typeparam name="T"></typeparam>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.Option`1.#ctor">
|
||||
<summary>
|
||||
Initialize a instance of Option class without any value.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.Option`1.#ctor(`0)">
|
||||
<summary>
|
||||
Initializes a instance of Option class with a specific value.
|
||||
</summary>
|
||||
<param name="value">The value to be associated with the new instance.</param>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.Option`1.GetValue">
|
||||
<summary>
|
||||
Returns the value of the option if Option.IsDefined is TRUE;
|
||||
otherwise, throws an <see cref="T:System.ArgumentException"/>.
|
||||
</summary>
|
||||
<returns></returns>
|
||||
</member>
|
||||
<member name="P:Microsoft.Spark.CSharp.Core.Option`1.IsDefined">
|
||||
<summary>
|
||||
Indicates whether the option value is defined.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Core.Partitioner">
|
||||
<summary>
|
||||
An object that defines how the elements in a key-value pair RDD are partitioned by key.
|
||||
|
@ -173,6 +284,11 @@
|
|||
Interface for collect operation on RDD
|
||||
</summary>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Core.DoubleRDDFunctions">
|
||||
<summary>
|
||||
Extra functions available on RDDs of Doubles through an implicit conversion.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.DoubleRDDFunctions.Sum(Microsoft.Spark.CSharp.Core.RDD{System.Double})">
|
||||
<summary>
|
||||
Add up the elements in this RDD.
|
||||
|
@ -285,6 +401,12 @@
|
|||
<param name="self"></param>
|
||||
<returns></returns>
|
||||
</member>
|
||||
<member name="T:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions">
|
||||
<summary>
|
||||
Extra functions available on RDDs of (key, value) pairs where the key is sortable through
|
||||
a function to sort the key.
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.SortByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Boolean,System.Nullable{System.Int32})">
|
||||
<summary>
|
||||
Sorts this RDD, which is assumed to consist of KeyValuePair pairs.
|
||||
|
@ -4049,7 +4171,7 @@
|
|||
<param name="fromOffsets">Per-topic/partition Kafka offsets defining the (inclusive) starting point of the stream.</param>
|
||||
<returns>A DStream object</returns>
|
||||
</member>
|
||||
<member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateDirectStreamWithRepartition(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.List{System.String},System.Collections.Generic.Dictionary{System.String,System.String},System.Collections.Generic.Dictionary{System.String,System.Int64},System.Int32)">
|
||||
<member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateDirectStreamWithRepartition(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.List{System.String},System.Collections.Generic.Dictionary{System.String,System.String},System.Collections.Generic.Dictionary{System.String,System.Int64},System.UInt32)">
|
||||
<summary>
|
||||
Create an input stream that directly pulls messages from a Kafka Broker and specific offset.
|
||||
|
||||
|
@ -4077,9 +4199,6 @@
|
|||
user hint on how many kafka RDD partitions to create instead of aligning with kafka partitions,
|
||||
unbalanced kafka partitions and/or under-distributed data will be redistributed evenly across
|
||||
a probably larger number of RDD partitions
|
||||
if numPartitions > 0, then it will try to rebalance kafkaRDD based on this user hint.
|
||||
if numPartitions == 0, then will fall back to the default behavior of KafkaRDD
|
||||
else, then will try to reblance KafkaRDD according to spark.streaming.kafka.maxRatePerTask.* conf
|
||||
</param>
|
||||
<returns>A DStream object</returns>
|
||||
</member>
|
||||
|
@ -4506,4 +4625,4 @@
|
|||
<typeparam name="U"></typeparam>
|
||||
</member>
|
||||
</members>
|
||||
</doc>
|
||||
</doc>
|
|
@ -19,7 +19,22 @@
|
|||
|
||||
####Methods
|
||||
|
||||
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue"></font></td><td>Adds a term to this accumulator's value</td></tr><tr><td><font color="blue"></font></td><td>The += operator; adds a term to this accumulator's value</td></tr><tr><td><font color="blue"></font></td><td>Provide a "zero value" for the type</td></tr><tr><td><font color="blue"></font></td><td>Add two values of the accumulator's data type, returning a new value;</td></tr></table>
|
||||
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue"></font></td><td>Adds a term to this accumulator's value</td></tr><tr><td><font color="blue"></font></td><td>The += operator; adds a term to this accumulator's value</td></tr><tr><td><font color="blue"></font></td><td>Creates and returns a string representation of the current accumulator</td></tr><tr><td><font color="blue"></font></td><td>Provide a "zero value" for the type</td></tr><tr><td><font color="blue"></font></td><td>Add two values of the accumulator's data type, returning a new value;</td></tr></table>
|
||||
|
||||
---
|
||||
|
||||
|
||||
###<font color="#68228B">Microsoft.Spark.CSharp.Core.Accumulator`1</font>
|
||||
####Summary
|
||||
|
||||
|
||||
A generic version of where the element type is specified by the driver program.
|
||||
|
||||
The type of element in the accumulator.
|
||||
|
||||
####Methods
|
||||
|
||||
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Add</font></td><td>Adds a term to this accumulator's value</td></tr><tr><td><font color="blue">op_Addition</font></td><td>The += operator; adds a term to this accumulator's value</td></tr><tr><td><font color="blue">ToString</font></td><td>Creates and returns a string representation of the current accumulator</td></tr></table>
|
||||
|
||||
---
|
||||
|
||||
|
@ -74,6 +89,21 @@
|
|||
---
|
||||
|
||||
|
||||
###<font color="#68228B">Microsoft.Spark.CSharp.Core.Broadcast`1</font>
|
||||
####Summary
|
||||
|
||||
|
||||
A generic version of where the element can be specified.
|
||||
|
||||
The type of element in Broadcast
|
||||
|
||||
####Methods
|
||||
|
||||
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Unpersist</font></td><td>Delete cached copies of this broadcast on the executors.</td></tr></table>
|
||||
|
||||
---
|
||||
|
||||
|
||||
###<font color="#68228B">Microsoft.Spark.CSharp.Core.Option`1</font>
|
||||
####Summary
|
||||
|
||||
|
@ -83,6 +113,13 @@
|
|||
|
||||
|
||||
|
||||
####Methods
|
||||
|
||||
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetValue</font></td><td>Returns the value of the option if Option.IsDefined is TRUE; otherwise, throws an .</td></tr></table>
|
||||
|
||||
---
|
||||
|
||||
|
||||
###<font color="#68228B">Microsoft.Spark.CSharp.Core.Partitioner</font>
|
||||
####Summary
|
||||
|
||||
|
@ -112,6 +149,35 @@
|
|||
Interface for collect operation on RDD
|
||||
|
||||
|
||||
###<font color="#68228B">Microsoft.Spark.CSharp.Core.DoubleRDDFunctions</font>
|
||||
####Summary
|
||||
|
||||
|
||||
Extra functions available on RDDs of Doubles through an implicit conversion.
|
||||
|
||||
|
||||
####Methods
|
||||
|
||||
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Sum</font></td><td>Add up the elements in this RDD. sc.Parallelize(new double[] {1.0, 2.0, 3.0}).Sum() 6.0</td></tr><tr><td><font color="blue">Stats</font></td><td>Return a object that captures the mean, variance and count of the RDD's elements in one operation.</td></tr><tr><td><font color="blue">Histogram</font></td><td>Compute a histogram using the provided buckets. The buckets are all open to the right except for the last which is closed. e.g. [1,10,20,50] means the buckets are [1,10) [10,20) [20,50], which means 1<=x<10, 10<=x<20, 20<=x<=50. And on the input of 1 and 50 we would have a histogram of 1,0,1. If your histogram is evenly spaced (e.g. [0, 10, 20, 30]), this can be switched from an O(log n) inseration to O(1) per element(where n = # buckets). Buckets must be sorted and not contain any duplicates, must be at least two elements. If `buckets` is a number, it will generates buckets which are evenly spaced between the minimum and maximum of the RDD. For example, if the min value is 0 and the max is 100, given buckets as 2, the resulting buckets will be [0,50) [50,100]. buckets must be at least 1 If the RDD contains infinity, NaN throws an exception If the elements in RDD do not vary (max == min) always returns a single bucket. It will return an tuple of buckets and histogram. >>> rdd = sc.parallelize(range(51)) >>> rdd.histogram(2) ([0, 25, 50], [25, 26]) >>> rdd.histogram([0, 5, 25, 50]) ([0, 5, 25, 50], [5, 20, 26]) >>> rdd.histogram([0, 15, 30, 45, 60]) # evenly spaced buckets ([0, 15, 30, 45, 60], [15, 15, 15, 6]) >>> rdd = sc.parallelize(["ab", "ac", "b", "bd", "ef"]) >>> rdd.histogram(("a", "b", "c")) (('a', 'b', 'c'), [2, 2])</td></tr><tr><td><font color="blue">Mean</font></td><td>Compute the mean of this RDD's elements. sc.Parallelize(new double[]{1, 2, 3}).Mean() 2.0</td></tr><tr><td><font color="blue">Variance</font></td><td>Compute the variance of this RDD's elements. sc.Parallelize(new double[]{1, 2, 3}).Variance() 0.666...</td></tr><tr><td><font color="blue">Stdev</font></td><td>Compute the standard deviation of this RDD's elements. sc.Parallelize(new double[]{1, 2, 3}).Stdev() 0.816...</td></tr><tr><td><font color="blue">SampleStdev</font></td><td>Compute the sample standard deviation of this RDD's elements (which corrects for bias in estimating the standard deviation by dividing by N-1 instead of N). sc.Parallelize(new double[]{1, 2, 3}).SampleStdev() 1.0</td></tr><tr><td><font color="blue">SampleVariance</font></td><td>Compute the sample variance of this RDD's elements (which corrects for bias in estimating the variance by dividing by N-1 instead of N). sc.Parallelize(new double[]{1, 2, 3}).SampleVariance() 1.0</td></tr></table>
|
||||
|
||||
---
|
||||
|
||||
|
||||
###<font color="#68228B">Microsoft.Spark.CSharp.Core.OrderedRDDFunctions</font>
|
||||
####Summary
|
||||
|
||||
|
||||
Extra functions available on RDDs of (key, value) pairs where the key is sortable through
|
||||
a function to sort the key.
|
||||
|
||||
|
||||
####Methods
|
||||
|
||||
<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">SortByKey``2</font></td><td>Sorts this RDD, which is assumed to consist of KeyValuePair pairs.</td></tr><tr><td><font color="blue">SortByKey``3</font></td><td>Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.</td></tr><tr><td><font color="blue">repartitionAndSortWithinPartitions``2</font></td><td>Repartition the RDD according to the given partitioner and, within each resulting partition, sort records by their keys. This is more efficient than calling `repartition` and then sorting within each partition because it can push the sorting down into the shuffle machinery.</td></tr></table>
|
||||
|
||||
---
|
||||
|
||||
|
||||
###<font color="#68228B">Microsoft.Spark.CSharp.Core.PairRDDFunctions</font>
|
||||
####Summary
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче