Append dataframe rows based on column names (#6808)

* Append dataframe rows based on column names

* Update DataFrame.cs

---------

Co-authored-by: Michael Sharp <51342856+michaelgsharp@users.noreply.github.com>
This commit is contained in:
Aleksei Smirnov 2023-09-02 07:06:55 +03:00 коммит произвёл GitHub
Родитель d9dbf99d97
Коммит d6927515d8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 48 добавлений и 5 удалений

Просмотреть файл

@ -1,4 +1,4 @@
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
@ -483,6 +483,7 @@ namespace Microsoft.Data.Analysis
/// </summary>
/// <remarks>If an input column's value doesn't match a DataFrameColumn's data type, a conversion will be attempted</remarks>
/// <remarks>If a <seealso cref="DataFrameRow"/> in <paramref name="rows"/> is null, a null value is appended to each column</remarks>
/// <remarks> Values are appended based on the column names</remarks>
/// <param name="rows">The rows to be appended to this DataFrame </param>
/// <param name="inPlace">If set, appends <paramref name="rows"/> in place. Otherwise, a new DataFrame is returned with the <paramref name="rows"/> appended</param>
/// <param name="cultureInfo">culture info for formatting values</param>
@ -491,7 +492,7 @@ namespace Microsoft.Data.Analysis
DataFrame ret = inPlace ? this : Clone();
foreach (DataFrameRow row in rows)
{
ret.Append(row, inPlace: true, cultureInfo: cultureInfo);
ret.Append(row.GetValues(), inPlace: true, cultureInfo: cultureInfo);
}
return ret;
}
@ -503,7 +504,7 @@ namespace Microsoft.Data.Analysis
/// <remarks>If <paramref name="row"/> is null, a null value is appended to each column</remarks>
/// <param name="row"></param>
/// <param name="inPlace">If set, appends a <paramref name="row"/> in place. Otherwise, a new DataFrame is returned with an appended <paramref name="row"/> </param>
/// <param name="cultureInfo">culture info for formatting values</param>
/// <param name="cultureInfo">Culture info for formatting values</param>
public DataFrame Append(IEnumerable<object> row = null, bool inPlace = false, CultureInfo cultureInfo = null)
{
if (cultureInfo == null)
@ -586,8 +587,14 @@ namespace Microsoft.Data.Analysis
/// <remarks>If a column's value doesn't match its column's data type, a conversion will be attempted</remarks>
/// <param name="row">An enumeration of column name and value to be appended</param>
/// <param name="inPlace">If set, appends <paramref name="row"/> in place. Otherwise, a new DataFrame is returned with an appended <paramref name="row"/> </param>
public DataFrame Append(IEnumerable<KeyValuePair<string, object>> row, bool inPlace = false)
/// <param name="cultureInfo">Culture info for formatting values</param>
public DataFrame Append(IEnumerable<KeyValuePair<string, object>> row, bool inPlace = false, CultureInfo cultureInfo = null)
{
if (cultureInfo == null)
{
cultureInfo = CultureInfo.CurrentCulture;
}
DataFrame ret = inPlace ? this : Clone();
if (row == null)
{
@ -608,7 +615,7 @@ namespace Microsoft.Data.Analysis
object value = columnAndValue.Value;
if (value != null)
{
value = Convert.ChangeType(value, column.DataType);
value = Convert.ChangeType(value, column.DataType, cultureInfo);
if (value is null)
{
throw new ArgumentException(string.Format(Strings.MismatchedValueType, column.DataType), column.Name);

Просмотреть файл

@ -6,6 +6,7 @@ using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
namespace Microsoft.Data.Analysis
@ -17,6 +18,7 @@ namespace Microsoft.Data.Analysis
{
private readonly DataFrame _dataFrame;
private readonly long _rowIndex;
internal DataFrameRow(DataFrame df, long rowIndex)
{
Debug.Assert(rowIndex < df.Columns.RowCount);
@ -35,6 +37,11 @@ namespace Microsoft.Data.Analysis
}
}
public IEnumerable<KeyValuePair<string, object>> GetValues()
{
return _dataFrame.Columns.Select(col => new KeyValuePair<string, object>(col.Name, col[_rowIndex]));
}
/// <summary>
/// An indexer to return the value at <paramref name="index"/>.
/// </summary>

Просмотреть файл

@ -3206,6 +3206,35 @@ namespace Microsoft.Data.Analysis.Tests
Verify(df, dfClone, df2);
}
[Fact]
public void TestAppendRowsIfColumnAreOutOfOrder()
{
var dataFrame = new DataFrame(
new StringDataFrameColumn("ColumnA", new string[] { "a", "b", "c" }),
new Int32DataFrameColumn("ColumnB", new int[] { 1, 2, 3 }),
new Int32DataFrameColumn("ColumnC", new int[] { 10, 20, 30 }));
//ColumnC and ColumnB are swaped
var dataFrame2 = new DataFrame(
new StringDataFrameColumn("ColumnA", new string[] { "d", "e", "f" }),
new Int32DataFrameColumn("ColumnC", new int[] { 40, 50, 60 }),
new Int32DataFrameColumn("ColumnB", new int[] { 4, 5, 6 }));
var resultDataFrame = dataFrame.Append(dataFrame2.Rows);
Assert.Equal(3, resultDataFrame.Columns.Count);
Assert.Equal(6, resultDataFrame.Rows.Count);
Assert.Equal("c", resultDataFrame["ColumnA"][2]);
Assert.Equal("d", resultDataFrame["ColumnA"][3]);
Assert.Equal(3, resultDataFrame["ColumnB"][2]);
Assert.Equal(4, resultDataFrame["ColumnB"][3]);
Assert.Equal(30, resultDataFrame["ColumnC"][2]);
Assert.Equal(40, resultDataFrame["ColumnC"][3]);
}
[Fact]
public void TestAppendRow()
{