[rubygems/rubygems] Add TarReader::Entry#seek to seek within the tar file entry

TarReader#each previously implemented a partial version of seek.
This code moved to Entry#seek for use from TarReader#each.

Entry#close now returns nil instead of true, like IO#close.

Closing an Entry now seeks to the end of the Entry, seeking past
any remaining zero byte tar file padding and moving the io to the
correcty position to read the next file in the archive.

Uses seek for Entry#rewind and #pos=, fixing the tar->gzip->tar nested
rewind that would break previous to this change.

Add Entry.open that behaves more like File.open.

https://github.com/rubygems/rubygems/commit/f5149565d5
This commit is contained in:
Martin Emde 2023-02-16 18:03:41 -08:00 коммит произвёл git
Родитель 719a7726d1
Коммит 85a1738ab3
6 изменённых файлов: 281 добавлений и 86 удалений

Просмотреть файл

@ -53,39 +53,11 @@ class Gem::Package::TarReader
def each
return enum_for __method__ unless block_given?
use_seek = @io.respond_to?(:seek)
until @io.eof? do
header = Gem::Package::TarHeader.from @io
return if header.empty?
entry = Gem::Package::TarReader::Entry.new header, @io
size = entry.header.size
yield entry
skip = (512 - (size % 512)) % 512
pending = size - entry.bytes_read
if use_seek
begin
# avoid reading if the @io supports seeking
@io.seek pending, IO::SEEK_CUR
pending = 0
rescue Errno::EINVAL
end
end
# if seeking isn't supported or failed
while pending > 0 do
bytes_read = @io.read([pending, 4096].min).size
raise UnexpectedEOF if @io.eof?
pending -= bytes_read
end
@io.read skip # discard trailing zeros
# make sure nobody can use #read, #getc or #rewind anymore
entry.close
end
end

Просмотреть файл

@ -8,6 +8,20 @@
# Class for reading entries out of a tar file
class Gem::Package::TarReader::Entry
##
# Creates a new tar entry for +header+ that will be read from +io+
# If a block is given, the entry is yielded and then closed.
def self.open(header, io, &block)
entry = new header, io
return entry unless block_given?
begin
yield entry
ensure
entry.close
end
end
##
# Header for this tar entry
@ -21,6 +35,7 @@ class Gem::Package::TarReader::Entry
@header = header
@io = io
@orig_pos = @io.pos
@end_pos = @orig_pos + @header.size
@read = 0
end
@ -39,7 +54,14 @@ class Gem::Package::TarReader::Entry
# Closes the tar entry
def close
return if closed?
# Seek to the end of the entry if it wasn't fully read
seek(0, IO::SEEK_END)
# discard trailing zeros
skip = (512 - (@header.size % 512)) % 512
@io.read(skip)
@closed = true
nil
end
##
@ -117,6 +139,14 @@ class Gem::Package::TarReader::Entry
bytes_read
end
##
# Seek to the position in the tar entry
def pos=(new_pos)
seek(new_pos, IO::SEEK_SET)
new_pos
end
def size
@header.size
end
@ -157,13 +187,62 @@ class Gem::Package::TarReader::Entry
outbuf
end
##
# Seeks to +offset+ bytes into the tar file entry
# +whence+ can be IO::SEEK_SET, IO::SEEK_CUR, or IO::SEEK_END
def seek(offset, whence = IO::SEEK_SET)
check_closed
new_pos =
case whence
when IO::SEEK_SET then @orig_pos + offset
when IO::SEEK_CUR then @io.pos + offset
when IO::SEEK_END then @end_pos + offset
else
raise ArgumentError, "invalid whence"
end
if new_pos < @orig_pos
new_pos = @orig_pos
elsif new_pos > @end_pos
new_pos = @end_pos
end
pending = new_pos - @io.pos
if @io.respond_to?(:seek)
begin
# avoid reading if the @io supports seeking
@io.seek new_pos, IO::SEEK_SET
pending = 0
rescue Errno::EINVAL
end
end
# if seeking isn't supported or failed
# negative seek requires that we rewind and read
if pending < 0
@io.rewind
pending = new_pos
end
while pending > 0 do
size_read = @io.read([pending, 4096].min).size
raise UnexpectedEOF if @io.eof?
pending -= size_read
end
@read = @io.pos - @orig_pos
0
end
##
# Rewinds to the beginning of the tar file entry
def rewind
check_closed
@io.pos = @orig_pos
@read = 0
seek(0, IO::SEEK_SET)
end
end

Просмотреть файл

@ -90,31 +90,36 @@ class Gem::Package::TarTestCase < Gem::TestCase
ASCIIZ("wheel", 32), # char gname[32]; ASCIIZ
Z(to_oct(0, 7)), # char devmajor[8]; 0 padded, octal, null
Z(to_oct(0, 7)), # char devminor[8]; 0 padded, octal, null
ASCIIZ(dname, 155), # char prefix[155]; ASCII + (Z unless filled)
ASCIIZ(dname, 155), # char prefix[155]; ASCII + (Z unless filled)
]
h = arr.join
ret = h + "\0" * (512 - h.size)
ret = ASCIIZ(h, 512)
assert_equal(512, ret.size)
ret
end
def tar_dir_header(name, prefix, mode, mtime)
h = header("5", name, prefix, 0, mode, mtime)
def header_with_checksum(type, fname, dname, length, mode, mtime, linkname = "")
h = header(type, fname, dname, length, mode, mtime, nil, linkname)
checksum = calc_checksum(h)
header("5", name, prefix, 0, mode, mtime, checksum)
header(type, fname, dname, length, mode, mtime, checksum, linkname)
end
def tar_dir_header(name, prefix, mode, mtime)
header_with_checksum("5", name, prefix, 0, mode, mtime)
end
def tar_file_header(fname, dname, mode, length, mtime)
h = header("0", fname, dname, length, mode, mtime)
checksum = calc_checksum(h)
header("0", fname, dname, length, mode, mtime, checksum)
header_with_checksum("0", fname, dname, length, mode, mtime)
end
def tar_symlink_header(fname, prefix, mode, mtime, linkname)
h = header("2", fname, prefix, 0, mode, mtime, nil, linkname)
checksum = calc_checksum(h)
header("2", fname, prefix, 0, mode, mtime, checksum, linkname)
def tar_symlink_header(fname, dname, mode, mtime, linkname)
header_with_checksum("2", fname, dname, 0, mode, mtime, linkname)
end
def tar_file_contents(content)
pad = (512 - (content.size % 512)) % 512
content + "\0" * pad
end
def to_oct(n, pad_size)
@ -122,11 +127,15 @@ class Gem::Package::TarTestCase < Gem::TestCase
end
def util_entry(tar)
io = TempIO.new tar
io = tar.respond_to?(:read) ? tar : TempIO.new(tar)
header = Gem::Package::TarHeader.from io
Gem::Package::TarReader::Entry.new header, io
Gem::Package::TarReader::Entry.open header, io
end
def close_util_entry(entry)
entry.instance_variable_get(:@io).close!
end
def util_dir_entry
@ -136,4 +145,30 @@ class Gem::Package::TarTestCase < Gem::TestCase
def util_symlink_entry
util_entry tar_symlink_header("foo", "bar", 0, Time.now, "link")
end
def util_tar(&block)
tar_io = StringIO.new
Gem::Package::TarWriter.new(tar_io, &block)
tar_io.rewind
tar_io
end
def util_tar_gz(&block)
tar_io = util_tar(&block)
StringIO.new util_gzip(tar_io.string)
end
def util_gem_data_tar(spec = nil, &block)
data_tgz = util_tar_gz(&block)
util_tar do |tar|
if spec
tar.add_file "metadata.gz", 0444 do |io|
io.write util_gzip(spec.to_yaml)
end
end
tar.add_file "data.tar.gz", 0644 do |io|
io.write data_tgz.string
end
end
end
end

Просмотреть файл

@ -1187,29 +1187,4 @@ class TestGemPackage < Gem::Package::TarTestCase
assert_equal %w[lib/code.rb], package.contents
end
def util_tar
tar_io = StringIO.new
Gem::Package::TarWriter.new tar_io do |tar|
yield tar
end
tar_io.rewind
tar_io
end
def util_tar_gz(&block)
tar_io = util_tar(&block)
tgz_io = StringIO.new
# can't wrap TarWriter because it seeks
Zlib::GzipWriter.wrap tgz_io do |io|
io.write tar_io.string
end
StringIO.new tgz_io.string
end
end

Просмотреть файл

@ -56,12 +56,14 @@ class TestGemPackageTarReader < Gem::Package::TarTestCase
io = TempIO.new tar
Gem::Package::TarReader.new io do |tar_reader|
tar_reader.seek "baz/bar" do |entry|
retval = tar_reader.seek "baz/bar" do |entry|
assert_kind_of Gem::Package::TarReader::Entry, entry
assert_equal "baz/bar", entry.full_name
entry.read
end
assert_equal "", retval
assert_equal 0, io.pos
end
ensure
@ -84,4 +86,49 @@ class TestGemPackageTarReader < Gem::Package::TarTestCase
ensure
io.close!
end
def test_read_in_gem_data
gem_tar = util_gem_data_tar do |tar|
tar.add_file "lib/code.rb", 0444 do |io|
io.write "# lib/code.rb"
end
end
count = 0
Gem::Package::TarReader.new(gem_tar).each do |entry|
next unless entry.full_name == "data.tar.gz"
Zlib::GzipReader.wrap entry do |gzio|
Gem::Package::TarReader.new(gzio).each do |contents_entry|
assert_equal "# lib/code.rb", contents_entry.read
count += 1
end
end
end
assert_equal 1, count, "should have found one file"
end
def test_seek_in_gem_data
gem_tar = util_gem_data_tar do |tar|
tar.add_file "lib/code.rb", 0444 do |io|
io.write "# lib/code.rb"
end
tar.add_file "lib/foo.rb", 0444 do |io|
io.write "# lib/foo.rb"
end
end
count = 0
Gem::Package::TarReader.new(gem_tar).seek("data.tar.gz") do |entry|
Zlib::GzipReader.wrap entry do |gzio|
Gem::Package::TarReader.new(gzio).seek("lib/foo.rb") do |contents_entry|
assert_equal "# lib/foo.rb", contents_entry.read
count += 1
end
end
end
assert_equal 1, count, "should have found one file"
end
end

Просмотреть файл

@ -10,8 +10,7 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
@tar = String.new
@tar << tar_file_header("lib/foo", "", 0, @contents.size, Time.now)
@tar << @contents
@tar << "\0" * (512 - (@tar.size % 512))
@tar << tar_file_contents(@contents)
@entry = util_entry @tar
end
@ -21,8 +20,35 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
super
end
def close_util_entry(entry)
entry.instance_variable_get(:@io).close!
def test_open
io = TempIO.new @tar
header = Gem::Package::TarHeader.from io
retval = Gem::Package::TarReader::Entry.open header, io do |entry|
entry.getc
end
assert_equal "a", retval
assert_equal @tar.size, io.pos, "should have read to end of entry"
end
def test_open_closes_entry
io = TempIO.new @tar
header = Gem::Package::TarHeader.from io
entry = nil
Gem::Package::TarReader::Entry.open header, io do |e|
entry = e
end
assert entry.closed?
assert_raise(IOError) { entry.getc }
end
def test_open_returns_entry
io = TempIO.new @tar
header = Gem::Package::TarHeader.from io
entry = Gem::Package::TarReader::Entry.open header, io
refute entry.closed?
assert_equal ?a, entry.getc
assert_nil entry.close
assert entry.closed?
end
def test_bytes_read
@ -177,6 +203,21 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
assert_equal char, @entry.getc
end
def test_seek
@entry.seek(50)
assert_equal 50, @entry.pos
assert_equal @contents[50..-1], @entry.read, "read remaining after seek"
@entry.seek(-50, IO::SEEK_CUR)
assert_equal @contents.size - 50, @entry.pos
assert_equal @contents[-50..-1], @entry.read, "read after stepping back 50 from the end"
@entry.seek(0, IO::SEEK_SET)
assert_equal 0, @entry.pos
assert_equal @contents, @entry.read, "read from beginning"
@entry.seek(-10, IO::SEEK_END)
assert_equal @contents.size - 10, @entry.pos
assert_equal @contents[-10..-1], @entry.read, "read from end"
end
def test_read_zero
expected = StringIO.new("")
assert_equal expected.read(0), @entry.read(0)
@ -187,28 +228,74 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
assert_equal expected.readpartial(0), @entry.readpartial(0)
end
def util_zero_byte_entry
tar = String.new
tar << tar_file_header("lib/empty", "", 0, 0, Time.now)
tar << "\0" * (512 - (tar.size % 512))
util_entry tar
end
def test_zero_byte_file_read
zero_entry = util_zero_byte_entry
zero_entry = util_entry(tar_file_header("foo", "", 0, 0, Time.now))
expected = StringIO.new("")
assert_equal expected.read, zero_entry.read
ensure
close_util_entry(zero_entry) if zero_entry
end
def test_zero_byte_file_readpartial
zero_entry = util_zero_byte_entry
zero_entry = util_entry(tar_file_header("foo", "", 0, 0, Time.now))
expected = StringIO.new("")
assert_equal expected.readpartial(0), zero_entry.readpartial(0)
ensure
close_util_entry(zero_entry) if zero_entry
end
def test_read_from_gzip_io
tgz = util_gzip(@tar)
Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio|
entry = util_entry(gzio)
assert_equal @contents, entry.read
entry.rewind
assert_equal @contents, entry.read, "second read after rewind should read same contents"
end
end
def test_read_from_gzip_io_with_non_zero_offset
contents2 = ("0".."9").to_a.join * 100
@tar << tar_file_header("lib/bar", "", 0, contents2.size, Time.now)
@tar << tar_file_contents(contents2)
tgz = util_gzip(@tar)
Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio|
util_entry(gzio).close # skip the first entry so io.pos is not 0, preventing easy rewind
entry = util_entry(gzio)
assert_equal contents2, entry.read
entry.rewind
assert_equal contents2, entry.read, "second read after rewind should read same contents"
end
end
def test_seek_in_gzip_io_with_non_zero_offset
contents2 = ("0".."9").to_a.join * 100
@tar << tar_file_header("lib/bar", "", 0, contents2.size, Time.now)
@tar << tar_file_contents(contents2)
tgz = util_gzip(@tar)
Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio|
util_entry(gzio).close # skip the first entry so io.pos is not 0
entry = util_entry(gzio)
entry.seek(50)
assert_equal 50, entry.pos
assert_equal contents2[50..-1], entry.read, "read remaining after seek"
entry.seek(-50, IO::SEEK_CUR)
assert_equal contents2.size - 50, entry.pos
assert_equal contents2[-50..-1], entry.read, "read after stepping back 50 from the end"
entry.seek(0, IO::SEEK_SET)
assert_equal 0, entry.pos
assert_equal contents2, entry.read, "read from beginning"
entry.seek(-10, IO::SEEK_END)
assert_equal contents2.size - 10, entry.pos
assert_equal contents2[-10..-1], entry.read, "read from end"
assert_equal contents2.size, entry.pos
end
end
end