Further work on Tuple change
This commit is contained in:
Родитель
66fc5123d0
Коммит
a36b16009e
|
@ -18,7 +18,7 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
{
|
||||
|
||||
/// <summary>
|
||||
/// Sorts this RDD, which is assumed to consist of KeyValuePair pairs.
|
||||
/// Sorts this RDD, which is assumed to consist of Tuple pairs.
|
||||
/// </summary>
|
||||
/// <typeparam name="K"></typeparam>
|
||||
/// <typeparam name="V"></typeparam>
|
||||
|
@ -32,7 +32,7 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
return SortByKey<K, V, K>(self, ascending, numPartitions, new DefaultSortKeyFuncHelper<K>().Execute);
|
||||
}
|
||||
/// <summary>
|
||||
/// Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.
|
||||
/// Sorts this RDD, which is assumed to consist of Tuples. If Item1 is type of string, case is sensitive.
|
||||
/// </summary>
|
||||
/// <typeparam name="K"></typeparam>
|
||||
/// <typeparam name="V"></typeparam>
|
||||
|
@ -40,7 +40,7 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// <param name="self"></param>
|
||||
/// <param name="ascending"></param>
|
||||
/// <param name="numPartitions">Number of partitions. Each partition of the sorted RDD contains a sorted range of the elements.</param>
|
||||
/// <param name="keyFunc">RDD will sort by keyFunc(key) for every key in KeyValuePair. Must not be null.</param>
|
||||
/// <param name="keyFunc">RDD will sort by keyFunc(key) for every Item1 in Tuple. Must not be null.</param>
|
||||
/// <returns></returns>
|
||||
public static RDD<Tuple<K, V>> SortByKey<K, V, U>(this RDD<Tuple<K, V>> self,
|
||||
bool ascending, int? numPartitions, Func<K, U> keyFunc)
|
||||
|
@ -103,13 +103,13 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// <param name="partitionFunc"></param>
|
||||
/// <param name="ascending"></param>
|
||||
/// <returns></returns>
|
||||
public static RDD<KeyValuePair<K, V>> repartitionAndSortWithinPartitions<K, V>(
|
||||
this RDD<KeyValuePair<K, V>> self,
|
||||
public static RDD<Tuple<K, V>> repartitionAndSortWithinPartitions<K, V>(
|
||||
this RDD<Tuple<K, V>> self,
|
||||
int? numPartitions = null,
|
||||
Func<K, int> partitionFunc = null,
|
||||
bool ascending = true)
|
||||
{
|
||||
return self.MapPartitionsWithIndex<KeyValuePair<K, V>>((pid, iter) => ascending ? iter.OrderBy(kv => kv.Key) : iter.OrderByDescending(kv => kv.Key));
|
||||
return self.MapPartitionsWithIndex<Tuple<K, V>>((pid, iter) => ascending ? iter.OrderBy(kv => kv.Item1) : iter.OrderByDescending(kv => kv.Item1));
|
||||
}
|
||||
|
||||
[Serializable]
|
||||
|
|
|
@ -241,7 +241,7 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
///
|
||||
/// Do
|
||||
/// {{{
|
||||
/// RDD<KeyValuePair<string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
|
||||
/// RDD<Tuple<string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
|
||||
/// }}}
|
||||
///
|
||||
/// then `rdd` contains
|
||||
|
@ -259,9 +259,9 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// <param name="filePath"></param>
|
||||
/// <param name="minPartitions"></param>
|
||||
/// <returns></returns>
|
||||
public RDD<KeyValuePair<byte[], byte[]>> WholeTextFiles(string filePath, int? minPartitions = null)
|
||||
public RDD<Tuple<byte[], byte[]>> WholeTextFiles(string filePath, int? minPartitions = null)
|
||||
{
|
||||
return new RDD<KeyValuePair<byte[], byte[]>>(SparkContextProxy.WholeTextFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
|
||||
return new RDD<Tuple<byte[], byte[]>>(SparkContextProxy.WholeTextFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -279,7 +279,7 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// }}}
|
||||
///
|
||||
/// Do
|
||||
/// RDD<KeyValuePair<string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
|
||||
/// RDD<Tuple<string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
|
||||
///
|
||||
/// then `rdd` contains
|
||||
/// {{{
|
||||
|
@ -296,9 +296,9 @@ namespace Microsoft.Spark.CSharp.Core
|
|||
/// <param name="filePath"></param>
|
||||
/// <param name="minPartitions"></param>
|
||||
/// <returns></returns>
|
||||
public RDD<KeyValuePair<byte[], byte[]>> BinaryFiles(string filePath, int? minPartitions)
|
||||
public RDD<Tuple<byte[], byte[]>> BinaryFiles(string filePath, int? minPartitions)
|
||||
{
|
||||
return new RDD<KeyValuePair<byte[], byte[]>>(SparkContextProxy.BinaryFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
|
||||
return new RDD<Tuple<byte[], byte[]>>(SparkContextProxy.BinaryFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
|
@ -372,7 +372,7 @@ namespace AdapterTest
|
|||
SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
|
||||
|
||||
// Act
|
||||
RDD<KeyValuePair<byte[], byte[]>> rdd = sc.WholeTextFiles(filePath, null);
|
||||
RDD<Tuple<byte[], byte[]>> rdd = sc.WholeTextFiles(filePath, null);
|
||||
|
||||
// Assert
|
||||
Assert.IsNotNull(rdd);
|
||||
|
@ -394,7 +394,7 @@ namespace AdapterTest
|
|||
SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
|
||||
|
||||
// Act
|
||||
RDD<KeyValuePair<byte[], byte[]>> rdd = sc.BinaryFiles(filePath, null);
|
||||
RDD<Tuple<byte[], byte[]>> rdd = sc.BinaryFiles(filePath, null);
|
||||
|
||||
// Assert
|
||||
Assert.IsNotNull(rdd);
|
||||
|
|
|
@ -99,8 +99,8 @@ namespace Microsoft.Spark.CSharp
|
|||
{
|
||||
Console.WriteLine(record);
|
||||
|
||||
var countByWord = (KeyValuePair<string, int>)record;
|
||||
Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "lazy" || countByWord.Key == "dog" ? 92 : 88);
|
||||
var countByWord = (Tuple<string, int>)record;
|
||||
Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "lazy" || countByWord.Item1 == "dog" ? 92 : 88);
|
||||
}
|
||||
Console.WriteLine();
|
||||
|
||||
|
@ -283,10 +283,10 @@ namespace Microsoft.Spark.CSharp
|
|||
|
||||
foreach (object record in taken)
|
||||
{
|
||||
KeyValuePair<int, int> sum = (KeyValuePair<int, int>)record;
|
||||
Console.WriteLine("Key: {0}, Value: {1}", sum.Key, sum.Value);
|
||||
Tuple<int, int> sum = (Tuple<int, int>)record;
|
||||
Console.WriteLine("Key: {0}, Value: {1}", sum.Item1, sum.Item2);
|
||||
// when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively
|
||||
Assert.AreEqual(sum.Value, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Key == 0 ? 2450 : 2500));
|
||||
Assert.AreEqual(sum.Item2, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Item1 == 0 ? 2450 : 2500));
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
@ -533,7 +533,7 @@ namespace Microsoft.Spark.CSharp
|
|||
.GetField("value", BindingFlags.NonPublic | BindingFlags.Instance)
|
||||
.GetValue(item.Value);
|
||||
logger.LogDebug("({0}, {1})", item.Key, value);
|
||||
formatter.Serialize(ms, new KeyValuePair<int, dynamic>(item.Key, value));
|
||||
formatter.Serialize(ms, new Tuple<int, dynamic>(item.Key, value));
|
||||
byte[] buffer = ms.ToArray();
|
||||
SerDe.Write(networkStream, buffer.Length);
|
||||
SerDe.Write(networkStream, buffer);
|
||||
|
@ -649,7 +649,7 @@ namespace Microsoft.Spark.CSharp
|
|||
}
|
||||
watch.Stop();
|
||||
|
||||
yield return new KeyValuePair<byte[], byte[]>(pairKey, pairValue);
|
||||
yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -573,7 +573,7 @@ namespace WorkerTest
|
|||
{
|
||||
WritePayloadHeaderToWorker(s);
|
||||
byte[] command = SparkContext.BuildCommand(
|
||||
new CSharpWorkerFunc((pid, iter) => iter.Cast<KeyValuePair<byte[], byte[]>>().Select(pair => pair.Key)),
|
||||
new CSharpWorkerFunc((pid, iter) => iter.Cast<Tuple<byte[], byte[]>>().Select(pair => pair.Item1)),
|
||||
SerializedMode.Pair, SerializedMode.None);
|
||||
|
||||
SerDe.Write(s, command.Length);
|
||||
|
@ -713,7 +713,7 @@ namespace WorkerTest
|
|||
/// <summary>
|
||||
/// read accumulator
|
||||
/// </summary>
|
||||
private IEnumerable<KeyValuePair<int, dynamic>> ReadAccumulator(Stream s, int expectedCount = 0)
|
||||
private IEnumerable<Tuple<int, dynamic>> ReadAccumulator(Stream s, int expectedCount = 0)
|
||||
{
|
||||
int count = 0;
|
||||
var formatter = new BinaryFormatter();
|
||||
|
@ -723,7 +723,7 @@ namespace WorkerTest
|
|||
if (length > 0)
|
||||
{
|
||||
var ms = new MemoryStream(SerDe.ReadBytes(s, length));
|
||||
yield return (KeyValuePair<int, dynamic>)formatter.Deserialize(ms);
|
||||
yield return (Tuple<int, dynamic>)formatter.Deserialize(ms);
|
||||
|
||||
if (expectedCount > 0 && ++count >= expectedCount)
|
||||
{
|
||||
|
@ -780,8 +780,8 @@ namespace WorkerTest
|
|||
int accumulatorsCount = SerDe.ReadInt(s);
|
||||
Assert.IsTrue(accumulatorsCount == 1);
|
||||
var accumulatorFromWorker = ReadAccumulator(s, accumulatorsCount).First();
|
||||
Assert.AreEqual(accumulatorId, accumulatorFromWorker.Key);
|
||||
Assert.AreEqual(expectedCount, accumulatorFromWorker.Value);
|
||||
Assert.AreEqual(accumulatorId, accumulatorFromWorker.Item1);
|
||||
Assert.AreEqual(expectedCount, accumulatorFromWorker.Item2);
|
||||
|
||||
SerDe.ReadInt(s);
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче