diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 3714fb18c2..4d8fb1487a 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -388,7 +388,8 @@ private static DataFrame ReadCsvLinesIntoDataFrame(WrappedStreamReaderOrStringRe // First pass: schema and number of rows. while ((fields = parser.ReadFields()) != null) { - if (renameDuplicatedColumns) + //Only first row contains column names + if (renameDuplicatedColumns && rowline == 0) { var names = new Dictionary(); diff --git a/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj b/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj index aeaea39c32..a7462d1949 100644 --- a/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj +++ b/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj @@ -46,7 +46,7 @@ - + diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index 4b0a2a5fd0..441c581ef6 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -119,6 +119,25 @@ private static Stream GetStream(string streamData) return new MemoryStream(Encoding.Default.GetBytes(streamData)); } + [Fact] + public void TestReadCsvWithHeaderCultureInfoAndColumnTypeAutoGuess() + { + //see https://github.com/dotnet/machinelearning/issues/7240 + + CultureInfo.CurrentCulture = CultureInfo.InvariantCulture; // or en-US + + string csv = +@"""Col1"",""Col2"",""Col3"",""Col4"" +""v1.1"",""5/7/2017"",""v3.1"",""v4.1"" +"""","""",""v3.2"",""v4.2"" +"; + + var dataFrame = DataFrame.LoadCsvFromString(csv, separator: ',', header: true, + dataTypes: null, // guess the column types + renameDuplicatedColumns: true, // try to rename the duplicated columns, if any + cultureInfo: CultureInfo.InvariantCulture); + } + [Theory] [InlineData(false)] [InlineData(true)]