Further work on Tuple change

2016-12-13 16:24:59 -05:00 · 2016-12-13 16:24:59 -05:00 · a36b16009e
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
@ -18,7 +18,7 @@ namespace Microsoft.Spark.CSharp.Core
    {

        /// <summary>
-        /// Sorts this RDD, which is assumed to consist of KeyValuePair pairs.
+        /// Sorts this RDD, which is assumed to consist of Tuple pairs.
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
@ -32,7 +32,7 @@ namespace Microsoft.Spark.CSharp.Core
            return SortByKey<K, V, K>(self, ascending, numPartitions, new DefaultSortKeyFuncHelper<K>().Execute);
        }
        /// <summary>
-        /// Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.
+        /// Sorts this RDD, which is assumed to consist of Tuples. If Item1 is type of string, case is sensitive.
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
@ -40,7 +40,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// <param name="self"></param>
        /// <param name="ascending"></param>
        /// <param name="numPartitions">Number of partitions. Each partition of the sorted RDD contains a sorted range of the elements.</param>
-        /// <param name="keyFunc">RDD will sort by keyFunc(key) for every key in KeyValuePair. Must not be null.</param>
+        /// <param name="keyFunc">RDD will sort by keyFunc(key) for every Item1 in Tuple. Must not be null.</param>
        /// <returns></returns>
        public static RDD<Tuple<K, V>> SortByKey<K, V, U>(this RDD<Tuple<K, V>> self,
            bool ascending, int? numPartitions, Func<K, U> keyFunc)
@ -103,13 +103,13 @@ namespace Microsoft.Spark.CSharp.Core
        /// <param name="partitionFunc"></param>
        /// <param name="ascending"></param>
        /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> repartitionAndSortWithinPartitions<K, V>(
-            this RDD<KeyValuePair<K, V>> self, 
+        public static RDD<Tuple<K, V>> repartitionAndSortWithinPartitions<K, V>(
+            this RDD<Tuple<K, V>> self, 
            int? numPartitions = null, 
            Func<K, int> partitionFunc = null, 
            bool ascending = true)
        {
-            return self.MapPartitionsWithIndex<KeyValuePair<K, V>>((pid, iter) => ascending ? iter.OrderBy(kv => kv.Key) : iter.OrderByDescending(kv => kv.Key));
+            return self.MapPartitionsWithIndex<Tuple<K, V>>((pid, iter) => ascending ? iter.OrderBy(kv => kv.Item1) : iter.OrderByDescending(kv => kv.Item1));
        }

        [Serializable]
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
@ -241,7 +241,7 @@ namespace Microsoft.Spark.CSharp.Core
        ///
        /// Do
        /// {{{
-        ///   RDD&lt;KeyValuePair&lt;string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
+        ///   RDD&lt;Tuple&lt;string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
        /// }}}
        ///
        /// then `rdd` contains
@ -259,9 +259,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// <param name="filePath"></param>
        /// <param name="minPartitions"></param>
        /// <returns></returns>
-        public RDD<KeyValuePair<byte[], byte[]>> WholeTextFiles(string filePath, int? minPartitions = null)
+        public RDD<Tuple<byte[], byte[]>> WholeTextFiles(string filePath, int? minPartitions = null)
        {
-            return new RDD<KeyValuePair<byte[], byte[]>>(SparkContextProxy.WholeTextFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
+            return new RDD<Tuple<byte[], byte[]>>(SparkContextProxy.WholeTextFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
        }

        /// <summary>
@ -279,7 +279,7 @@ namespace Microsoft.Spark.CSharp.Core
        /// }}}
        ///
        /// Do
-        /// RDD&lt;KeyValuePair&lt;string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+        /// RDD&lt;Tuple&lt;string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
        ///
        /// then `rdd` contains
        /// {{{
@ -296,9 +296,9 @@ namespace Microsoft.Spark.CSharp.Core
        /// <param name="filePath"></param>
        /// <param name="minPartitions"></param>
        /// <returns></returns>
-        public RDD<KeyValuePair<byte[], byte[]>> BinaryFiles(string filePath, int? minPartitions)
+        public RDD<Tuple<byte[], byte[]>> BinaryFiles(string filePath, int? minPartitions)
        {
-            return new RDD<KeyValuePair<byte[], byte[]>>(SparkContextProxy.BinaryFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
+            return new RDD<Tuple<byte[], byte[]>>(SparkContextProxy.BinaryFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
        }

        /// <summary>
--- a/csharp/AdapterTest/SparkContextTest.cs
+++ b/csharp/AdapterTest/SparkContextTest.cs
@ -372,7 +372,7 @@ namespace AdapterTest
            SparkContext sc = new SparkContext(sparkContextProxy.Object, null);

            // Act
-            RDD<KeyValuePair<byte[], byte[]>> rdd = sc.WholeTextFiles(filePath, null);
+            RDD<Tuple<byte[], byte[]>> rdd = sc.WholeTextFiles(filePath, null);

            // Assert
            Assert.IsNotNull(rdd);
@ -394,7 +394,7 @@ namespace AdapterTest
            SparkContext sc = new SparkContext(sparkContextProxy.Object, null);

            // Act
-            RDD<KeyValuePair<byte[], byte[]>> rdd = sc.BinaryFiles(filePath, null);
+            RDD<Tuple<byte[], byte[]>> rdd = sc.BinaryFiles(filePath, null);

            // Assert
            Assert.IsNotNull(rdd);
--- a/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs
@ -99,8 +99,8 @@ namespace Microsoft.Spark.CSharp
                        {
                            Console.WriteLine(record);
                            
-                            var countByWord = (KeyValuePair<string, int>)record;
-                            Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "lazy" || countByWord.Key == "dog" ? 92 : 88);
+                            var countByWord = (Tuple<string, int>)record;
+                            Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "lazy" || countByWord.Item1 == "dog" ? 92 : 88);
                        }
                        Console.WriteLine();

@ -283,10 +283,10 @@ namespace Microsoft.Spark.CSharp

                foreach (object record in taken)
                {
-                    KeyValuePair<int, int> sum = (KeyValuePair<int, int>)record;
-                    Console.WriteLine("Key: {0}, Value: {1}", sum.Key, sum.Value);
+                    Tuple<int, int> sum = (Tuple<int, int>)record;
+                    Console.WriteLine("Key: {0}, Value: {1}", sum.Item1, sum.Item2);
                    // when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively
-                    Assert.AreEqual(sum.Value, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Key == 0 ? 2450 : 2500));
+                    Assert.AreEqual(sum.Item2, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Item1 == 0 ? 2450 : 2500));
                }
            });

--- a/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
@ -533,7 +533,7 @@ namespace Microsoft.Spark.CSharp
                        .GetField("value", BindingFlags.NonPublic | BindingFlags.Instance)
                        .GetValue(item.Value);
                logger.LogDebug("({0}, {1})", item.Key, value);
-                formatter.Serialize(ms, new KeyValuePair<int, dynamic>(item.Key, value));
+                formatter.Serialize(ms, new Tuple<int, dynamic>(item.Key, value));
                byte[] buffer = ms.ToArray();
                SerDe.Write(networkStream, buffer.Length);
                SerDe.Write(networkStream, buffer);
@ -649,7 +649,7 @@ namespace Microsoft.Spark.CSharp
                                }
                                watch.Stop();

-                                yield return new KeyValuePair<byte[], byte[]>(pairKey, pairValue);
+                                yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
                                break;
                            }

--- a/csharp/WorkerTest/WorkerTest.cs
+++ b/csharp/WorkerTest/WorkerTest.cs
@ -573,7 +573,7 @@ namespace WorkerTest
            {
                WritePayloadHeaderToWorker(s);
                byte[] command = SparkContext.BuildCommand(
-                    new CSharpWorkerFunc((pid, iter) => iter.Cast<KeyValuePair<byte[], byte[]>>().Select(pair => pair.Key)),
+                    new CSharpWorkerFunc((pid, iter) => iter.Cast<Tuple<byte[], byte[]>>().Select(pair => pair.Item1)),
                    SerializedMode.Pair, SerializedMode.None);

                SerDe.Write(s, command.Length);
@ -713,7 +713,7 @@ namespace WorkerTest
        /// <summary>
        /// read accumulator
        /// </summary>
-        private IEnumerable<KeyValuePair<int, dynamic>> ReadAccumulator(Stream s, int expectedCount = 0)
+        private IEnumerable<Tuple<int, dynamic>> ReadAccumulator(Stream s, int expectedCount = 0)
        {
            int count = 0;
            var formatter = new BinaryFormatter();
@ -723,7 +723,7 @@ namespace WorkerTest
                if (length > 0)
                {
                    var ms = new MemoryStream(SerDe.ReadBytes(s, length));
-                    yield return (KeyValuePair<int, dynamic>)formatter.Deserialize(ms);
+                    yield return (Tuple<int, dynamic>)formatter.Deserialize(ms);

                    if (expectedCount > 0 && ++count >= expectedCount)
                    {
@ -780,8 +780,8 @@ namespace WorkerTest
                int accumulatorsCount = SerDe.ReadInt(s);
                Assert.IsTrue(accumulatorsCount == 1);
                var accumulatorFromWorker = ReadAccumulator(s, accumulatorsCount).First();
-                Assert.AreEqual(accumulatorId, accumulatorFromWorker.Key);
-                Assert.AreEqual(expectedCount, accumulatorFromWorker.Value);
+                Assert.AreEqual(accumulatorId, accumulatorFromWorker.Item1);
+                Assert.AreEqual(expectedCount, accumulatorFromWorker.Item2);

                SerDe.ReadInt(s);
            }