Spark 3.1.0 APIs - Functions (#890)

2021-04-08 20:06:29 -07:00 · 2021-04-08 20:06:29 -07:00 · 8bd763ce53
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/FunctionsTests.cs
@ -846,5 +846,37 @@ namespace Microsoft.Spark.E2ETest.IpcTests
            Assert.IsType<Column>(Bucket(Lit(1), col));
            Assert.IsType<Column>(Bucket(1, col));
        }
+
+        /// <summary>
+        /// Test signatures for APIs introduced in Spark 3.1.*.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V3_1_0)]
+        public void TestSignaturesV3_1_X()
+        {
+            Column col = Column("col");
+
+            Assert.IsType<Column>(PercentileApprox(col, col, col));
+
+            Assert.IsType<Column>(NthValue(col, 0));
+            Assert.IsType<Column>(NthValue(col, 0, true));
+
+            Assert.IsType<Column>(Acosh(col));
+            Assert.IsType<Column>(Acosh("col"));
+
+            Assert.IsType<Column>(Asinh(col));
+            Assert.IsType<Column>(Asinh("col"));
+
+            Assert.IsType<Column>(Atanh(col));
+            Assert.IsType<Column>(Atanh("col"));
+
+            Assert.IsType<Column>(AssertTrue(col));
+            Assert.IsType<Column>(AssertTrue(col, col));
+
+            Assert.IsType<Column>(RaiseError(col));
+
+            Assert.IsType<Column>(TimestampSeconds(col));
+
+            Assert.IsType<Column>(Slice(col, col, col));
+        }
    }
 }
--- a/src/csharp/Microsoft.Spark/Sql/Functions.cs
+++ b/src/csharp/Microsoft.Spark/Sql/Functions.cs
@ -531,6 +531,30 @@ namespace Microsoft.Spark.Sql
            return ApplyFunction("min", columnName);
        }

+        /// <summary>
+        /// Returns the approximate `percentile` of the numeric column `col` which
+        /// is the smallest value in the ordered `col` values (sorted from least to greatest) such that
+        /// no more than `percentage` of `col` values is less than the value or equal to that value.
+        /// </summary>
+        /// <param name="column">Column to apply</param>
+        /// <param name="percentage">
+        /// If it is a single floating point value, it must be between 0.0 and 1.0.
+        /// When percentage is an array, each value of the percentage array must be between 0.0 and 1.0.
+        /// In this case, returns the approximate percentile array of column col
+        /// at the given percentage array.
+        /// </param>
+        /// <param name="accuracy">
+        /// Positive numeric literal which controls approximation accuracy at the cost of memory.
+        /// Higher value of accuracy yields better accuracy, 1.0/accuracy is the relative error of the
+        /// approximation.
+        /// </param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column PercentileApprox(Column column, Column percentage, Column accuracy)
+        {
+            return ApplyFunction("percentile_approx", column, percentage, accuracy);
+        }
+
        /// <summary>
        /// Returns the skewness of the values in a group.
        /// </summary>
@ -652,7 +676,7 @@ namespace Microsoft.Spark.Sql
        }

        /// <summary>
-        /// Alias for VarSamp().
+        /// Alias for <see cref="VarSamp(Sql.Column)"/>.
        /// </summary>
        /// <param name="column">Column to apply</param>
        /// <returns>Column object</returns>
@ -662,7 +686,7 @@ namespace Microsoft.Spark.Sql
        }

        /// <summary>
-        /// Alias for VarSamp().
+        /// Alias for <see cref="VarSamp(string)"/>.
        /// </summary>
        /// <param name="columnName">Column name</param>
        /// <returns>Column object</returns>
@ -852,6 +876,25 @@ namespace Microsoft.Spark.Sql
                ApplyFunction("lead", columnName, offset);
        }

+        /// <summary>
+        /// Returns the value that is the `offset`th row of the window frame
+        /// (counting from 1), and `null` if the size of window frame is less than `offset` rows.
+        ///
+        /// It will return the `offset`th non-null value it sees when ignoreNulls is set to true.
+        /// If all values are null, then null is returned.
+        ///
+        /// This is equivalent to the nth_value function in SQL.
+        /// </summary>
+        /// <param name="column">Column to apply</param>
+        /// <param name="offset">Offset from the current row</param>
+        /// <param name="ignoreNulls">To ignore null or not</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column NthValue(Column column, int offset, bool ignoreNulls = false)
+        {
+            return ApplyFunction("nth_value", column, offset, ignoreNulls);
+        }
+
        /// <summary>
        /// Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered
        /// window partition. For example, if `n` is 4, the first quarter of the rows will get
@ -1214,6 +1257,28 @@ namespace Microsoft.Spark.Sql
            return ApplyFunction("acos", columnName);
        }

+        /// <summary>
+        /// Inverse hyperbolic cosine of <paramref name="column"/>.
+        /// </summary>
+        /// <param name="column">Column to apply</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column Acosh(Column column)
+        {
+            return ApplyFunction("acosh", column);
+        }
+
+        /// <summary>
+        /// Inverse hyperbolic cosine of <paramref name="columnName"/>.
+        /// </summary>
+        /// <param name="columnName">Column name</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column Acosh(string columnName)
+        {
+            return ApplyFunction("acosh", columnName);
+        }
+
        /// <summary>
        /// Inverse sine of `column` in radians, as if computed by `java.lang.Math.asin`.
        /// </summary>
@ -1234,6 +1299,28 @@ namespace Microsoft.Spark.Sql
            return ApplyFunction("asin", columnName);
        }

+        /// <summary>
+        /// Inverse hyperbolic sine of <paramref name="column"/>.
+        /// </summary>
+        /// <param name="column">Column to apply</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column Asinh(Column column)
+        {
+            return ApplyFunction("asinh", column);
+        }
+
+        /// <summary>
+        /// Inverse hyperbolic sine of <paramref name="columnName"/>.
+        /// </summary>
+        /// <param name="columnName">Column name</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column Asinh(string columnName)
+        {
+            return ApplyFunction("asinh", columnName);
+        }
+
        /// <summary>
        /// Inverse tangent of `column` in radians, as if computed by `java.lang.Math.atan`.
        /// </summary>
@ -1342,6 +1429,28 @@ namespace Microsoft.Spark.Sql
            return ApplyFunction("atan2", yValue, xName);
        }

+        /// <summary>
+        /// Inverse hyperbolic tangent of <paramref name="column"/>.
+        /// </summary>
+        /// <param name="column">Column to apply</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column Atanh(Column column)
+        {
+            return ApplyFunction("atanh", column);
+        }
+
+        /// <summary>
+        /// Inverse hyperbolic tangent of <paramref name="columnName"/>.
+        /// </summary>
+        /// <param name="columnName">Column name</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column Atanh(string columnName)
+        {
+            return ApplyFunction("atanh", columnName);
+        }
+
        /// <summary>
        /// An expression that returns the string representation of the binary value
        /// of the given long column. For example, bin("12") returns "1100".
@ -2197,6 +2306,40 @@ namespace Microsoft.Spark.Sql
            return ApplyFunction("xxhash64", (object)columns);
        }

+        /// <summary>
+        /// Returns null if the condition is true, and throws an exception otherwise.
+        /// </summary>
+        /// <param name="column">Column to apply</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column AssertTrue(Column column)
+        {
+            return ApplyFunction("assert_true", column);
+        }
+
+        /// <summary>
+        /// Returns null if the condition is true; throws an exception with the error message otherwise.
+        /// </summary>
+        /// <param name="column">Column to apply</param>
+        /// <param name="errMsg">Error message</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column AssertTrue(Column column, Column errMsg)
+        {
+            return ApplyFunction("assert_true", column, errMsg);
+        }
+
+        /// <summary>
+        /// Throws an exception with the provided error message.
+        /// </summary>
+        /// <param name="errMsg">Error message</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column RaiseError(Column errMsg)
+        {
+            return ApplyFunction("raise_error", errMsg);
+        }
+
        /////////////////////////////////////////////////////////////////////////////////
        // String functions
        /////////////////////////////////////////////////////////////////////////////////
@ -3192,6 +3335,17 @@ namespace Microsoft.Spark.Sql
            return ApplyFunction("window", column, windowDuration);
        }

+        /// <summary>
+        /// Creates timestamp from the number of seconds since UTC epoch.
+        /// </summary>
+        /// <param name="column">Column to apply</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column TimestampSeconds(Column column)
+        {
+            return ApplyFunction("timestamp_seconds", column);
+        }
+
        /////////////////////////////////////////////////////////////////////////////////
        // Collection functions
        /////////////////////////////////////////////////////////////////////////////////
@ -3236,6 +3390,20 @@ namespace Microsoft.Spark.Sql
            return ApplyFunction("slice", column, start, length);
        }

+        /// <summary>
+        /// Returns an array containing all the elements in `column` from index `start`
+        /// (or starting from the end if `start` is negative) with the specified `length`.
+        /// </summary>
+        /// <param name="column">Column to apply</param>
+        /// <param name="start">Start position in the array</param>
+        /// <param name="length">Length for slicing</param>
+        /// <returns>Column object</returns>
+        [Since(Versions.V3_1_0)]
+        public static Column Slice(Column column, Column start, Column length)
+        {
+            return ApplyFunction("slice", column, start, length);
+        }
+
        /// <summary>
        /// Concatenates the elements of `column` using the `delimiter`.
        /// Null values are replaced with `nullReplacement`.