From effa0c76d9813a2a823531cf15e4f62702fd1571 Mon Sep 17 00:00:00 2001 From: jeg2 Date: Fri, 25 Jun 2010 02:59:20 +0000 Subject: [PATCH] * lib/csv.rb: Fixing a bug that prevented CSV from parsing all multi-line fields correctly. Patch by Rob Biedenham. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@28431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++++ lib/csv.rb | 10 ++++++++-- test/csv/test_csv_parsing.rb | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9c99e054ad..394f4a241c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Fri Jun 25 11:45:36 2010 James Edward Gray II + + * lib/csv.rb: Fixing a bug that prevented CSV from parsing + all multi-line fields correctly. Patch by Rob Biedenham. + Fri Jun 25 10:07:14 2010 Nobuyoshi Nakada * test/ruby/envutil.rb (Test::Unit::Assertions#assert_in_out_err): diff --git a/lib/csv.rb b/lib/csv.rb index d3a295b3a8..8685e3d658 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -198,7 +198,7 @@ require "stringio" # class CSV # The version of the installed library. - VERSION = "2.4.6".freeze + VERSION = "2.4.7".freeze # # A CSV::Row is part Array and part Hash. It retains an order for the fields @@ -1843,7 +1843,13 @@ class CSV end parts = parse.split(@col_sep, -1) - csv << nil if parts.empty? + if parts.empty? + if in_extended_col + csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop + else + csv << nil + end + end # This loop is the hot path of csv parsing. Some things may be non-dry # for a reason. Make sure to benchmark when refactoring. diff --git a/test/csv/test_csv_parsing.rb b/test/csv/test_csv_parsing.rb index e3609b7648..c0b8d83f96 100644 --- a/test/csv/test_csv_parsing.rb +++ b/test/csv/test_csv_parsing.rb @@ -115,6 +115,22 @@ class TestCSVParsing < Test::Unit::TestCase assert_equal(Array.new, CSV.parse_line("\n1,2,3\n")) end + def test_rob_edge_cases + [ [%Q{"a\nb"}, ["a\nb"]], + [%Q{"\n\n\n"}, ["\n\n\n"]], + [%Q{a,"b\n\nc"}, ['a', "b\n\nc"]], + [%Q{,"\r\n"}, [nil,"\r\n"]], + [%Q{,"\r\n."}, [nil,"\r\n."]], + [%Q{"a\na","one newline"}, ["a\na", 'one newline']], + [%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']], + [%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']], + [%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']], + [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]], + ].each do |edge_case| + assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) + end + end + def test_non_regex_edge_cases # An early version of the non-regex parser fails this test [ [ "foo,\"foo,bar,baz,foo\",\"foo\"",