From d6927515d8014442d35ee22817ec7c40b9e7eec7 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Sat, 2 Sep 2023 07:06:55 +0300 Subject: [PATCH] Append dataframe rows based on column names (#6808) * Append dataframe rows based on column names * Update DataFrame.cs --------- Co-authored-by: Michael Sharp <51342856+michaelgsharp@users.noreply.github.com> --- src/Microsoft.Data.Analysis/DataFrame.cs | 17 +++++++---- src/Microsoft.Data.Analysis/DataFrameRow.cs | 7 +++++ .../DataFrameTests.cs | 29 +++++++++++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.cs b/src/Microsoft.Data.Analysis/DataFrame.cs index ad02200e6..6b4083794 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -483,6 +483,7 @@ namespace Microsoft.Data.Analysis /// /// If an input column's value doesn't match a DataFrameColumn's data type, a conversion will be attempted /// If a in is null, a null value is appended to each column + /// Values are appended based on the column names /// The rows to be appended to this DataFrame /// If set, appends in place. Otherwise, a new DataFrame is returned with the appended /// culture info for formatting values @@ -491,7 +492,7 @@ namespace Microsoft.Data.Analysis DataFrame ret = inPlace ? this : Clone(); foreach (DataFrameRow row in rows) { - ret.Append(row, inPlace: true, cultureInfo: cultureInfo); + ret.Append(row.GetValues(), inPlace: true, cultureInfo: cultureInfo); } return ret; } @@ -503,7 +504,7 @@ namespace Microsoft.Data.Analysis /// If is null, a null value is appended to each column /// /// If set, appends a in place. Otherwise, a new DataFrame is returned with an appended - /// culture info for formatting values + /// Culture info for formatting values public DataFrame Append(IEnumerable row = null, bool inPlace = false, CultureInfo cultureInfo = null) { if (cultureInfo == null) @@ -586,8 +587,14 @@ namespace Microsoft.Data.Analysis /// If a column's value doesn't match its column's data type, a conversion will be attempted /// An enumeration of column name and value to be appended /// If set, appends in place. Otherwise, a new DataFrame is returned with an appended - public DataFrame Append(IEnumerable> row, bool inPlace = false) + /// Culture info for formatting values + public DataFrame Append(IEnumerable> row, bool inPlace = false, CultureInfo cultureInfo = null) { + if (cultureInfo == null) + { + cultureInfo = CultureInfo.CurrentCulture; + } + DataFrame ret = inPlace ? this : Clone(); if (row == null) { @@ -608,7 +615,7 @@ namespace Microsoft.Data.Analysis object value = columnAndValue.Value; if (value != null) { - value = Convert.ChangeType(value, column.DataType); + value = Convert.ChangeType(value, column.DataType, cultureInfo); if (value is null) { throw new ArgumentException(string.Format(Strings.MismatchedValueType, column.DataType), column.Name); diff --git a/src/Microsoft.Data.Analysis/DataFrameRow.cs b/src/Microsoft.Data.Analysis/DataFrameRow.cs index 0d21fb0ba..748477b74 100644 --- a/src/Microsoft.Data.Analysis/DataFrameRow.cs +++ b/src/Microsoft.Data.Analysis/DataFrameRow.cs @@ -6,6 +6,7 @@ using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; using System.Text; namespace Microsoft.Data.Analysis @@ -17,6 +18,7 @@ namespace Microsoft.Data.Analysis { private readonly DataFrame _dataFrame; private readonly long _rowIndex; + internal DataFrameRow(DataFrame df, long rowIndex) { Debug.Assert(rowIndex < df.Columns.RowCount); @@ -35,6 +37,11 @@ namespace Microsoft.Data.Analysis } } + public IEnumerable> GetValues() + { + return _dataFrame.Columns.Select(col => new KeyValuePair(col.Name, col[_rowIndex])); + } + /// /// An indexer to return the value at . /// diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index 423a6ef1b..b02319e47 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -3206,6 +3206,35 @@ namespace Microsoft.Data.Analysis.Tests Verify(df, dfClone, df2); } + [Fact] + public void TestAppendRowsIfColumnAreOutOfOrder() + { + var dataFrame = new DataFrame( + new StringDataFrameColumn("ColumnA", new string[] { "a", "b", "c" }), + new Int32DataFrameColumn("ColumnB", new int[] { 1, 2, 3 }), + new Int32DataFrameColumn("ColumnC", new int[] { 10, 20, 30 })); + + //ColumnC and ColumnB are swaped + var dataFrame2 = new DataFrame( + new StringDataFrameColumn("ColumnA", new string[] { "d", "e", "f" }), + new Int32DataFrameColumn("ColumnC", new int[] { 40, 50, 60 }), + new Int32DataFrameColumn("ColumnB", new int[] { 4, 5, 6 })); + + var resultDataFrame = dataFrame.Append(dataFrame2.Rows); + + Assert.Equal(3, resultDataFrame.Columns.Count); + Assert.Equal(6, resultDataFrame.Rows.Count); + + Assert.Equal("c", resultDataFrame["ColumnA"][2]); + Assert.Equal("d", resultDataFrame["ColumnA"][3]); + + Assert.Equal(3, resultDataFrame["ColumnB"][2]); + Assert.Equal(4, resultDataFrame["ColumnB"][3]); + + Assert.Equal(30, resultDataFrame["ColumnC"][2]); + Assert.Equal(40, resultDataFrame["ColumnC"][3]); + } + [Fact] public void TestAppendRow() {