зеркало из https://github.com/dotnet/spark.git
Spark 3.1.0 APIs - DataFrame (#888)
This commit is contained in:
Родитель
ea83dac5a6
Коммит
7c67ec9c7a
|
@ -667,6 +667,8 @@ namespace Microsoft.Spark.E2ETest.IpcTests
|
|||
|
||||
_df.CreateGlobalTempView("global_view");
|
||||
_df.CreateOrReplaceGlobalTempView("global_view");
|
||||
|
||||
Assert.IsType<string[]>(_df.InputFiles().ToArray());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -696,10 +698,11 @@ namespace Microsoft.Spark.E2ETest.IpcTests
|
|||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test signatures for APIs introduced in Spark 3.*
|
||||
/// Test signatures for APIs introduced in Spark 3.0.*.
|
||||
|
||||
/// </summary>
|
||||
[SkipIfSparkVersionIsLessThan(Versions.V3_0_0)]
|
||||
public void TestSignaturesV3_X_X()
|
||||
public void TestSignaturesV3_0_X()
|
||||
{
|
||||
// Validate ToLocalIterator
|
||||
var data = new List<GenericRow>
|
||||
|
@ -729,5 +732,18 @@ namespace Microsoft.Spark.E2ETest.IpcTests
|
|||
_df.Explain("cost");
|
||||
_df.Explain("formatted");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test signatures for APIs introduced in Spark 3.1.*.
|
||||
/// </summary>
|
||||
[SkipIfSparkVersionIsLessThan(Versions.V3_1_0)]
|
||||
public void TestSignaturesV3_1_X()
|
||||
{
|
||||
Assert.IsType<DataFrame>(_df.UnionByName(_df, true));
|
||||
|
||||
Assert.IsType<bool>(_df.SameSemantics(_df));
|
||||
|
||||
Assert.IsType<int>(_df.SemanticHash());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -576,6 +576,18 @@ namespace Microsoft.Spark.Sql
|
|||
public DataFrame UnionByName(DataFrame other) =>
|
||||
WrapAsDataFrame(_jvmObject.Invoke("unionByName", other));
|
||||
|
||||
/// <summary>
|
||||
/// Returns a new <see cref="DataFrame"/> containing union of rows in this
|
||||
/// <see cref="DataFrame"/> and another <see cref="DataFrame"/>, resolving
|
||||
/// columns by name.
|
||||
/// </summary>
|
||||
/// <param name="other">Other DataFrame</param>
|
||||
/// <param name="allowMissingColumns">Allow missing columns</param>
|
||||
/// <returns>DataFrame object</returns>
|
||||
[Since(Versions.V3_1_0)]
|
||||
public DataFrame UnionByName(DataFrame other, bool allowMissingColumns) =>
|
||||
WrapAsDataFrame(_jvmObject.Invoke("unionByName", other, allowMissingColumns));
|
||||
|
||||
/// <summary>
|
||||
/// Returns a new `DataFrame` containing rows only in both this `DataFrame`
|
||||
/// and another `DataFrame`.
|
||||
|
@ -1019,6 +1031,48 @@ namespace Microsoft.Spark.Sql
|
|||
public DataStreamWriter WriteStream() =>
|
||||
new DataStreamWriter((JvmObjectReference)_jvmObject.Invoke("writeStream"), this);
|
||||
|
||||
/// <summary>
|
||||
/// Returns a best-effort snapshot of the files that compose this <see cref="DataFrame"/>.
|
||||
/// This method simply asks each constituent BaseRelation for its respective files and takes
|
||||
/// the union of all results. Depending on the source relations, this may not find all input
|
||||
/// files. Duplicates are removed.
|
||||
/// </summary>
|
||||
/// <returns>Files that compose this DataFrame</returns>
|
||||
public IEnumerable<string> InputFiles() => (string[])_jvmObject.Invoke("inputFiles");
|
||||
|
||||
/// <summary>
|
||||
/// Returns `true` when the logical query plans inside both <see cref="DataFrame"/>s are
|
||||
/// equal and therefore return same results.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The equality comparison here is simplified by tolerating the cosmetic differences
|
||||
/// such as attribute names.
|
||||
///
|
||||
/// This API can compare both <see cref="DataFrame"/>s very fast but can still return `false`
|
||||
/// on the <see cref="DataFrame"/> that return the same results, for instance, from different
|
||||
/// plans. Such false negative semantic can be useful when caching as an example.
|
||||
/// </remarks>
|
||||
/// <param name="other">Other DataFrame</param>
|
||||
/// <returns>
|
||||
/// `true` when the logical query plans inside both <see cref="DataFrame"/>s are
|
||||
/// equal and therefore return same results.
|
||||
/// </returns>
|
||||
[Since(Versions.V3_1_0)]
|
||||
public bool SameSemantics(DataFrame other) =>
|
||||
(bool)_jvmObject.Invoke("sameSemantics", other);
|
||||
|
||||
/// <summary>
|
||||
/// Returns a hash code of the logical query plan against this <see cref="DataFrame"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Unlike the standard hash code, the hash is calculated against the query plan
|
||||
/// simplified by tolerating the cosmetic differences such as attribute names.
|
||||
/// </remarks>
|
||||
/// <returns>Hash code of the logical query plan</returns>
|
||||
[Since(Versions.V3_1_0)]
|
||||
public int SemanticHash() =>
|
||||
(int)_jvmObject.Invoke("semanticHash");
|
||||
|
||||
/// <summary>
|
||||
/// Returns row objects based on the function (either "toPythonIterator",
|
||||
/// "collectToPython", or "tailToPython").
|
||||
|
|
Загрузка…
Ссылка в новой задаче