Fix problems with DataFrame WriteCsv when quotes are present in data (#6340)

* Add DataFrame.IO tests with separators in data

* Add test where comma is in header

* Add two versions of test cases, likely going to use the helper version

* Fix separators in data

* Fix separators in header

* Clean up tests

* Fix issue with not wrapping output with newlines in quotations

* Accidental commit

* Clean up

* Clean up mini test framework a bit

* Fix WriteCsv bug when quotations are present in data.

* Accidental include

* Manually merge with main

* Delete extra line

* Accidental includes

* Use new raw string literal syntax

* Add NeedsQuotes helper

* IndexOfAny refactor

* Remove accidental include

* Add preview language tag to csproj
This commit is contained in:
Drew Kersnar 2022-10-04 16:02:08 -05:00 коммит произвёл GitHub
Родитель 632c373169
Коммит 7d764bb3a1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 108 добавлений и 34 удалений

Просмотреть файл

@ -3,7 +3,6 @@
// See the LICENSE file in the project root for more information.
using System;
using System.Collections;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
@ -499,11 +498,11 @@ namespace Microsoft.Data.Analysis
if (t == typeof(string))
{
bool needsQuotes = ((string)cell).IndexOf(separator) != -1 || ((string)cell).IndexOf('\n') != -1;
if (needsQuotes)
string stringCell = (string)cell;
if (NeedsQuotes(stringCell, separator))
{
record.Append('\"');
record.Append(cell);
record.Append(stringCell.Replace("\"", "\"\"")); // Quotations in CSV data must be escaped with another quotation
record.Append('\"');
continue;
}
@ -519,6 +518,7 @@ namespace Microsoft.Data.Analysis
}
}
}
private static void WriteHeader(StreamWriter csvFile, IReadOnlyList<string> columnNames, char separator)
{
bool firstColumn = true;
@ -533,11 +533,10 @@ namespace Microsoft.Data.Analysis
firstColumn = false;
}
bool needsQuotes = name.IndexOf(separator) != -1 || name.IndexOf('\n') != -1;
if (needsQuotes)
if (NeedsQuotes(name, separator))
{
csvFile.Write('\"');
csvFile.Write(name);
csvFile.Write(name.Replace("\"", "\"\"")); // Quotations in CSV data must be escaped with another quotation
csvFile.Write('\"');
}
else
@ -545,8 +544,12 @@ namespace Microsoft.Data.Analysis
csvFile.Write(name);
}
}
csvFile.WriteLine();
}
private static bool NeedsQuotes(string csvCell, char separator)
{
return csvCell.AsSpan().IndexOfAny(separator, '\n', '\"') != -1;
}
}
}

Просмотреть файл

@ -1064,10 +1064,12 @@ CMT,";
{
yield return new object[] // Comma Separators in Data
{
@"Name,Age,Description
Paul,34,""Paul lives in Vermont, VA.""
Victor,29,""Victor: Funny guy""
Maria,31,",
"""
Name,Age,Description
Paul,34,"Paul lives in Vermont, VA."
Victor,29,"Victor: Funny guy"
Maria,31,
""",
',',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
@ -1085,10 +1087,12 @@ Maria,31,",
};
yield return new object[] // Colon Separators in Data
{
@"Name:Age:Description
Paul:34:""Paul lives in Vermont, VA.""
Victor:29:""Victor: Funny guy""
Maria:31:",
"""
Name:Age:Description
Paul:34:"Paul lives in Vermont, VA."
Victor:29:"Victor: Funny guy"
Maria:31:
""",
':',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
@ -1106,10 +1110,12 @@ Maria:31:",
};
yield return new object[] // Comma Separators in Header
{
@"""Na,me"",Age,Description
Paul,34,""Paul lives in Vermont, VA.""
Victor,29,""Victor: Funny guy""
Maria,31,",
"""
"Na,me",Age,Description
Paul,34,"Paul lives in Vermont, VA."
Victor,29,"Victor: Funny guy"
Maria,31,
""",
',',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
@ -1127,11 +1133,13 @@ Maria,31,",
};
yield return new object[] // Newlines In Data
{
@"Name,Age,Description
Paul,34,""Paul lives in Vermont
VA.""
Victor,29,""Victor: Funny guy""
Maria,31,",
"""
Name,Age,Description
Paul,34,"Paul lives in Vermont
VA."
Victor,29,"Victor: Funny guy"
Maria,31,
""",
',',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
@ -1141,8 +1149,15 @@ Maria,31,",
new Type[] { typeof(string), typeof(int), typeof(string) },
new object[][]
{
new object[] { "Paul", 34, @"Paul lives in Vermont
VA." },
new object[]
{
"Paul",
34,
"""
Paul lives in Vermont
VA.
"""
},
new object[] { "Victor", 29, "Victor: Funny guy" },
new object[] { "Maria", 31, "" }
}
@ -1150,18 +1165,73 @@ VA." },
};
yield return new object[] // Newlines In Header
{
@"""Na
me"":Age:Description
Paul:34:""Paul lives in Vermont, VA.""
Victor:29:""Victor: Funny guy""
Maria:31:",
"""
"Na
me":Age:Description
Paul:34:"Paul lives in Vermont, VA."
Victor:29:"Victor: Funny guy"
Maria:31:
""",
':',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
3,
3,
new string[] { @"Na
me", "Age", "Description" },
new string[]
{
"""
Na
me
""",
"Age",
"Description"
},
new Type[] { typeof(string), typeof(int), typeof(string) },
new object[][]
{
new object[] { "Paul", 34, "Paul lives in Vermont, VA." },
new object[] { "Victor", 29, "Victor: Funny guy" },
new object[] { "Maria", 31, "" }
}
)
};
yield return new object[] // Quotations in Data
{
"""
Name,Age,Description
Paul,34,"Paul lives in ""Vermont VA""."
Victor,29,"Victor: Funny guy"
Maria,31,
""",
',',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
3,
3,
new string[] { "Name", "Age", "Description" },
new Type[] { typeof(string), typeof(int), typeof(string) },
new object[][]
{
new object[] { "Paul", 34, """Paul lives in "Vermont VA".""" },
new object[] { "Victor", 29, "Victor: Funny guy" },
new object[] { "Maria", 31, "" }
}
)
};
yield return new object[] // Quotations in Header
{
"""
Name,Age,"De""script""ion"
Paul,34,"Paul lives in Vermont, VA."
Victor,29,"Victor: Funny guy"
Maria,31,
""",
',',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
3,
3,
new string[] { "Name", "Age", """De"script"ion""" },
new Type[] { typeof(string), typeof(int), typeof(string) },
new object[][]
{

Просмотреть файл

@ -1,6 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<NoWarn>$(NoWarn);MSML_ParameterLocalVarName;MSML_PrivateFieldName;MSML_ExtendBaseTestClass;MSML_GeneralName</NoWarn>
<LangVersion>preview</LangVersion>
</PropertyGroup>
<ItemGroup>