зеркало из https://github.com/github/ruby.git
[ruby/csv] Revert "parser: fix a keep bug that some texts may be dropped unexpectedly"
This reverts commit https://github.com/ruby/csv/commit/5c6523da0a61. This introduces another pbolem. We should try again later. https://github.com/ruby/csv/commit/43a1d6fff1
This commit is contained in:
Родитель
002ce9f515
Коммит
22ef4f6445
|
@ -167,7 +167,6 @@ class CSV
|
|||
end
|
||||
|
||||
def keep_start
|
||||
adjust_last_keep
|
||||
@keeps.push([@scanner.pos, nil])
|
||||
end
|
||||
|
||||
|
@ -198,17 +197,7 @@ class CSV
|
|||
end
|
||||
|
||||
def keep_drop
|
||||
_, buffer = @keeps.pop
|
||||
return unless buffer
|
||||
|
||||
last_keep = @keeps.last
|
||||
return unless last_keep
|
||||
|
||||
if last_keep[1]
|
||||
last_keep[1] << buffer
|
||||
else
|
||||
last_keep[1] = buffer
|
||||
end
|
||||
@keeps.pop
|
||||
end
|
||||
|
||||
def rest
|
||||
|
@ -216,30 +205,24 @@ class CSV
|
|||
end
|
||||
|
||||
private
|
||||
def adjust_last_keep
|
||||
keep = @keeps.last
|
||||
return if keep.nil?
|
||||
|
||||
keep_start = keep[0]
|
||||
return if @scanner.pos == keep_start
|
||||
|
||||
string = @scanner.string
|
||||
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
|
||||
if keep_data
|
||||
keep_buffer = keep[1]
|
||||
if keep_buffer
|
||||
keep_buffer << keep_data
|
||||
else
|
||||
keep[1] = keep_data.dup
|
||||
end
|
||||
end
|
||||
keep[0] = 0
|
||||
end
|
||||
|
||||
def read_chunk
|
||||
return false if @last_scanner
|
||||
|
||||
adjust_last_keep
|
||||
unless @keeps.empty?
|
||||
keep = @keeps.last
|
||||
keep_start = keep[0]
|
||||
string = @scanner.string
|
||||
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
|
||||
if keep_data
|
||||
keep_buffer = keep[1]
|
||||
if keep_buffer
|
||||
keep_buffer << keep_data
|
||||
else
|
||||
keep[1] = keep_data.dup
|
||||
end
|
||||
end
|
||||
keep[0] = 0
|
||||
end
|
||||
|
||||
input = @inputs.first
|
||||
case input
|
||||
|
@ -746,26 +729,28 @@ class CSV
|
|||
sample[0, 128].index(@quote_character)
|
||||
end
|
||||
|
||||
class UnoptimizedStringIO # :nodoc:
|
||||
def initialize(string)
|
||||
@io = StringIO.new(string, "rb:#{string.encoding}")
|
||||
end
|
||||
|
||||
def gets(*args)
|
||||
@io.gets(*args)
|
||||
end
|
||||
|
||||
def each_line(*args, &block)
|
||||
@io.each_line(*args, &block)
|
||||
end
|
||||
|
||||
def eof?
|
||||
@io.eof?
|
||||
end
|
||||
end
|
||||
|
||||
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
|
||||
if SCANNER_TEST
|
||||
class UnoptimizedStringIO
|
||||
def initialize(string)
|
||||
@io = StringIO.new(string, "rb:#{string.encoding}")
|
||||
end
|
||||
|
||||
def gets(*args)
|
||||
@io.gets(*args)
|
||||
end
|
||||
|
||||
def each_line(*args, &block)
|
||||
@io.each_line(*args, &block)
|
||||
end
|
||||
|
||||
def eof?
|
||||
@io.eof?
|
||||
end
|
||||
end
|
||||
|
||||
SCANNER_TEST_CHUNK_SIZE =
|
||||
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
|
||||
def build_scanner
|
||||
inputs = @samples.collect do |sample|
|
||||
UnoptimizedStringIO.new(sample)
|
||||
|
@ -775,12 +760,10 @@ class CSV
|
|||
else
|
||||
inputs << @input
|
||||
end
|
||||
chunk_size =
|
||||
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
|
||||
InputsScanner.new(inputs,
|
||||
@encoding,
|
||||
@row_separator,
|
||||
chunk_size: chunk_size)
|
||||
chunk_size: SCANNER_TEST_CHUNK_SIZE)
|
||||
end
|
||||
else
|
||||
def build_scanner
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
require_relative "../helper"
|
||||
|
||||
class TestCSVParseInputsScanner < Test::Unit::TestCase
|
||||
include Helper
|
||||
|
||||
def test_keep_over_chunks_nested_back
|
||||
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
|
||||
scanner = CSV::Parser::InputsScanner.new([input],
|
||||
Encoding::UTF_8,
|
||||
nil,
|
||||
chunk_size: 2)
|
||||
scanner.keep_start
|
||||
assert_equal("abc", scanner.scan_all(/[a-c]+/))
|
||||
scanner.keep_start
|
||||
assert_equal("def", scanner.scan_all(/[d-f]+/))
|
||||
scanner.keep_back
|
||||
scanner.keep_back
|
||||
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
|
||||
end
|
||||
|
||||
|
||||
def test_keep_over_chunks_nested_drop_back
|
||||
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
|
||||
scanner = CSV::Parser::InputsScanner.new([input],
|
||||
Encoding::UTF_8,
|
||||
nil,
|
||||
chunk_size: 3)
|
||||
scanner.keep_start
|
||||
assert_equal("ab", scanner.scan(/../))
|
||||
scanner.keep_start
|
||||
assert_equal("c", scanner.scan(/./))
|
||||
assert_equal("d", scanner.scan(/./))
|
||||
scanner.keep_drop
|
||||
scanner.keep_back
|
||||
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
|
||||
end
|
||||
end
|
Загрузка…
Ссылка в новой задаче