This commit is contained in:
Steve Suh 2021-04-08 14:44:37 -07:00 коммит произвёл GitHub
Родитель ea83dac5a6
Коммит 7c67ec9c7a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 72 добавлений и 2 удалений

Просмотреть файл

@ -667,6 +667,8 @@ namespace Microsoft.Spark.E2ETest.IpcTests
_df.CreateGlobalTempView("global_view");
_df.CreateOrReplaceGlobalTempView("global_view");
Assert.IsType<string[]>(_df.InputFiles().ToArray());
}
/// <summary>
@ -696,10 +698,11 @@ namespace Microsoft.Spark.E2ETest.IpcTests
}
/// <summary>
/// Test signatures for APIs introduced in Spark 3.*
/// Test signatures for APIs introduced in Spark 3.0.*.
/// </summary>
[SkipIfSparkVersionIsLessThan(Versions.V3_0_0)]
public void TestSignaturesV3_X_X()
public void TestSignaturesV3_0_X()
{
// Validate ToLocalIterator
var data = new List<GenericRow>
@ -729,5 +732,18 @@ namespace Microsoft.Spark.E2ETest.IpcTests
_df.Explain("cost");
_df.Explain("formatted");
}
/// <summary>
/// Test signatures for APIs introduced in Spark 3.1.*.
/// </summary>
[SkipIfSparkVersionIsLessThan(Versions.V3_1_0)]
public void TestSignaturesV3_1_X()
{
Assert.IsType<DataFrame>(_df.UnionByName(_df, true));
Assert.IsType<bool>(_df.SameSemantics(_df));
Assert.IsType<int>(_df.SemanticHash());
}
}
}

Просмотреть файл

@ -576,6 +576,18 @@ namespace Microsoft.Spark.Sql
public DataFrame UnionByName(DataFrame other) =>
WrapAsDataFrame(_jvmObject.Invoke("unionByName", other));
/// <summary>
/// Returns a new <see cref="DataFrame"/> containing union of rows in this
/// <see cref="DataFrame"/> and another <see cref="DataFrame"/>, resolving
/// columns by name.
/// </summary>
/// <param name="other">Other DataFrame</param>
/// <param name="allowMissingColumns">Allow missing columns</param>
/// <returns>DataFrame object</returns>
[Since(Versions.V3_1_0)]
public DataFrame UnionByName(DataFrame other, bool allowMissingColumns) =>
WrapAsDataFrame(_jvmObject.Invoke("unionByName", other, allowMissingColumns));
/// <summary>
/// Returns a new `DataFrame` containing rows only in both this `DataFrame`
/// and another `DataFrame`.
@ -1019,6 +1031,48 @@ namespace Microsoft.Spark.Sql
public DataStreamWriter WriteStream() =>
new DataStreamWriter((JvmObjectReference)_jvmObject.Invoke("writeStream"), this);
/// <summary>
/// Returns a best-effort snapshot of the files that compose this <see cref="DataFrame"/>.
/// This method simply asks each constituent BaseRelation for its respective files and takes
/// the union of all results. Depending on the source relations, this may not find all input
/// files. Duplicates are removed.
/// </summary>
/// <returns>Files that compose this DataFrame</returns>
public IEnumerable<string> InputFiles() => (string[])_jvmObject.Invoke("inputFiles");
/// <summary>
/// Returns `true` when the logical query plans inside both <see cref="DataFrame"/>s are
/// equal and therefore return same results.
/// </summary>
/// <remarks>
/// The equality comparison here is simplified by tolerating the cosmetic differences
/// such as attribute names.
///
/// This API can compare both <see cref="DataFrame"/>s very fast but can still return `false`
/// on the <see cref="DataFrame"/> that return the same results, for instance, from different
/// plans. Such false negative semantic can be useful when caching as an example.
/// </remarks>
/// <param name="other">Other DataFrame</param>
/// <returns>
/// `true` when the logical query plans inside both <see cref="DataFrame"/>s are
/// equal and therefore return same results.
/// </returns>
[Since(Versions.V3_1_0)]
public bool SameSemantics(DataFrame other) =>
(bool)_jvmObject.Invoke("sameSemantics", other);
/// <summary>
/// Returns a hash code of the logical query plan against this <see cref="DataFrame"/>.
/// </summary>
/// <remarks>
/// Unlike the standard hash code, the hash is calculated against the query plan
/// simplified by tolerating the cosmetic differences such as attribute names.
/// </remarks>
/// <returns>Hash code of the logical query plan</returns>
[Since(Versions.V3_1_0)]
public int SemanticHash() =>
(int)_jvmObject.Invoke("semanticHash");
/// <summary>
/// Returns row objects based on the function (either "toPythonIterator",
/// "collectToPython", or "tailToPython").