This commit is contained in:
dwnichols 2016-12-13 16:24:59 -05:00
Родитель 66fc5123d0
Коммит a36b16009e
6 изменённых файлов: 26 добавлений и 26 удалений

Просмотреть файл

@ -18,7 +18,7 @@ namespace Microsoft.Spark.CSharp.Core
{
/// <summary>
/// Sorts this RDD, which is assumed to consist of KeyValuePair pairs.
/// Sorts this RDD, which is assumed to consist of Tuple pairs.
/// </summary>
/// <typeparam name="K"></typeparam>
/// <typeparam name="V"></typeparam>
@ -32,7 +32,7 @@ namespace Microsoft.Spark.CSharp.Core
return SortByKey<K, V, K>(self, ascending, numPartitions, new DefaultSortKeyFuncHelper<K>().Execute);
}
/// <summary>
/// Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.
/// Sorts this RDD, which is assumed to consist of Tuples. If Item1 is type of string, case is sensitive.
/// </summary>
/// <typeparam name="K"></typeparam>
/// <typeparam name="V"></typeparam>
@ -40,7 +40,7 @@ namespace Microsoft.Spark.CSharp.Core
/// <param name="self"></param>
/// <param name="ascending"></param>
/// <param name="numPartitions">Number of partitions. Each partition of the sorted RDD contains a sorted range of the elements.</param>
/// <param name="keyFunc">RDD will sort by keyFunc(key) for every key in KeyValuePair. Must not be null.</param>
/// <param name="keyFunc">RDD will sort by keyFunc(key) for every Item1 in Tuple. Must not be null.</param>
/// <returns></returns>
public static RDD<Tuple<K, V>> SortByKey<K, V, U>(this RDD<Tuple<K, V>> self,
bool ascending, int? numPartitions, Func<K, U> keyFunc)
@ -103,13 +103,13 @@ namespace Microsoft.Spark.CSharp.Core
/// <param name="partitionFunc"></param>
/// <param name="ascending"></param>
/// <returns></returns>
public static RDD<KeyValuePair<K, V>> repartitionAndSortWithinPartitions<K, V>(
this RDD<KeyValuePair<K, V>> self,
public static RDD<Tuple<K, V>> repartitionAndSortWithinPartitions<K, V>(
this RDD<Tuple<K, V>> self,
int? numPartitions = null,
Func<K, int> partitionFunc = null,
bool ascending = true)
{
return self.MapPartitionsWithIndex<KeyValuePair<K, V>>((pid, iter) => ascending ? iter.OrderBy(kv => kv.Key) : iter.OrderByDescending(kv => kv.Key));
return self.MapPartitionsWithIndex<Tuple<K, V>>((pid, iter) => ascending ? iter.OrderBy(kv => kv.Item1) : iter.OrderByDescending(kv => kv.Item1));
}
[Serializable]

Просмотреть файл

@ -241,7 +241,7 @@ namespace Microsoft.Spark.CSharp.Core
///
/// Do
/// {{{
/// RDD&lt;KeyValuePair&lt;string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
/// RDD&lt;Tuple&lt;string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
/// }}}
///
/// then `rdd` contains
@ -259,9 +259,9 @@ namespace Microsoft.Spark.CSharp.Core
/// <param name="filePath"></param>
/// <param name="minPartitions"></param>
/// <returns></returns>
public RDD<KeyValuePair<byte[], byte[]>> WholeTextFiles(string filePath, int? minPartitions = null)
public RDD<Tuple<byte[], byte[]>> WholeTextFiles(string filePath, int? minPartitions = null)
{
return new RDD<KeyValuePair<byte[], byte[]>>(SparkContextProxy.WholeTextFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
return new RDD<Tuple<byte[], byte[]>>(SparkContextProxy.WholeTextFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
}
/// <summary>
@ -279,7 +279,7 @@ namespace Microsoft.Spark.CSharp.Core
/// }}}
///
/// Do
/// RDD&lt;KeyValuePair&lt;string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
/// RDD&lt;Tuple&lt;string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
///
/// then `rdd` contains
/// {{{
@ -296,9 +296,9 @@ namespace Microsoft.Spark.CSharp.Core
/// <param name="filePath"></param>
/// <param name="minPartitions"></param>
/// <returns></returns>
public RDD<KeyValuePair<byte[], byte[]>> BinaryFiles(string filePath, int? minPartitions)
public RDD<Tuple<byte[], byte[]>> BinaryFiles(string filePath, int? minPartitions)
{
return new RDD<KeyValuePair<byte[], byte[]>>(SparkContextProxy.BinaryFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
return new RDD<Tuple<byte[], byte[]>>(SparkContextProxy.BinaryFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
}
/// <summary>

Просмотреть файл

@ -372,7 +372,7 @@ namespace AdapterTest
SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
// Act
RDD<KeyValuePair<byte[], byte[]>> rdd = sc.WholeTextFiles(filePath, null);
RDD<Tuple<byte[], byte[]>> rdd = sc.WholeTextFiles(filePath, null);
// Assert
Assert.IsNotNull(rdd);
@ -394,7 +394,7 @@ namespace AdapterTest
SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
// Act
RDD<KeyValuePair<byte[], byte[]>> rdd = sc.BinaryFiles(filePath, null);
RDD<Tuple<byte[], byte[]>> rdd = sc.BinaryFiles(filePath, null);
// Assert
Assert.IsNotNull(rdd);

Просмотреть файл

@ -99,8 +99,8 @@ namespace Microsoft.Spark.CSharp
{
Console.WriteLine(record);
var countByWord = (KeyValuePair<string, int>)record;
Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "lazy" || countByWord.Key == "dog" ? 92 : 88);
var countByWord = (Tuple<string, int>)record;
Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "lazy" || countByWord.Item1 == "dog" ? 92 : 88);
}
Console.WriteLine();
@ -283,10 +283,10 @@ namespace Microsoft.Spark.CSharp
foreach (object record in taken)
{
KeyValuePair<int, int> sum = (KeyValuePair<int, int>)record;
Console.WriteLine("Key: {0}, Value: {1}", sum.Key, sum.Value);
Tuple<int, int> sum = (Tuple<int, int>)record;
Console.WriteLine("Key: {0}, Value: {1}", sum.Item1, sum.Item2);
// when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively
Assert.AreEqual(sum.Value, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Key == 0 ? 2450 : 2500));
Assert.AreEqual(sum.Item2, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Item1 == 0 ? 2450 : 2500));
}
});

Просмотреть файл

@ -533,7 +533,7 @@ namespace Microsoft.Spark.CSharp
.GetField("value", BindingFlags.NonPublic | BindingFlags.Instance)
.GetValue(item.Value);
logger.LogDebug("({0}, {1})", item.Key, value);
formatter.Serialize(ms, new KeyValuePair<int, dynamic>(item.Key, value));
formatter.Serialize(ms, new Tuple<int, dynamic>(item.Key, value));
byte[] buffer = ms.ToArray();
SerDe.Write(networkStream, buffer.Length);
SerDe.Write(networkStream, buffer);
@ -649,7 +649,7 @@ namespace Microsoft.Spark.CSharp
}
watch.Stop();
yield return new KeyValuePair<byte[], byte[]>(pairKey, pairValue);
yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
break;
}

Просмотреть файл

@ -573,7 +573,7 @@ namespace WorkerTest
{
WritePayloadHeaderToWorker(s);
byte[] command = SparkContext.BuildCommand(
new CSharpWorkerFunc((pid, iter) => iter.Cast<KeyValuePair<byte[], byte[]>>().Select(pair => pair.Key)),
new CSharpWorkerFunc((pid, iter) => iter.Cast<Tuple<byte[], byte[]>>().Select(pair => pair.Item1)),
SerializedMode.Pair, SerializedMode.None);
SerDe.Write(s, command.Length);
@ -713,7 +713,7 @@ namespace WorkerTest
/// <summary>
/// read accumulator
/// </summary>
private IEnumerable<KeyValuePair<int, dynamic>> ReadAccumulator(Stream s, int expectedCount = 0)
private IEnumerable<Tuple<int, dynamic>> ReadAccumulator(Stream s, int expectedCount = 0)
{
int count = 0;
var formatter = new BinaryFormatter();
@ -723,7 +723,7 @@ namespace WorkerTest
if (length > 0)
{
var ms = new MemoryStream(SerDe.ReadBytes(s, length));
yield return (KeyValuePair<int, dynamic>)formatter.Deserialize(ms);
yield return (Tuple<int, dynamic>)formatter.Deserialize(ms);
if (expectedCount > 0 && ++count >= expectedCount)
{
@ -780,8 +780,8 @@ namespace WorkerTest
int accumulatorsCount = SerDe.ReadInt(s);
Assert.IsTrue(accumulatorsCount == 1);
var accumulatorFromWorker = ReadAccumulator(s, accumulatorsCount).First();
Assert.AreEqual(accumulatorId, accumulatorFromWorker.Key);
Assert.AreEqual(expectedCount, accumulatorFromWorker.Value);
Assert.AreEqual(accumulatorId, accumulatorFromWorker.Item1);
Assert.AreEqual(expectedCount, accumulatorFromWorker.Item2);
SerDe.ReadInt(s);
}