From 1a853390ee08af1b8ff3d1882a8762155d151306 Mon Sep 17 00:00:00 2001 From: drbrain Date: Tue, 10 Jul 2012 03:51:25 +0000 Subject: [PATCH] * ext/zlib/zlib.c: Revert r36349. Added streaming support to inflate processing. rb_block_given_p() is not callable without the GVL. * ext/zlib/extconf.rb: ditto * NEWS: ditto * test/zlib/test_zlib.rb: ditto git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@36354 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 21 ----- NEWS | 2 - ext/zlib/extconf.rb | 3 +- ext/zlib/zlib.c | 183 ++++++++++++----------------------------- test/zlib/test_zlib.rb | 153 ---------------------------------- 5 files changed, 53 insertions(+), 309 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2284fcf813..0019976704 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,27 +6,6 @@ Tue Jul 10 10:43:37 2012 NAKAMURA Usaku * configure.in: removed --enable/disable-win95 options. (see r36432) -Tue Jul 10 08:56:17 2012 Eric Hodel - - * ext/zlib/zlib.c: Added streaming support to inflate processing. - This allows zlib streams to be processed without huge memory growth. - [Feature #6612] - * NEWS: ditto - * ext/zlib/zlib.c (zstream_expand_buffer): Uses rb_yield when a block - is given for streaming support. Refactored to use - zstream_expand_buffer_into to remove duplicate code. - * ext/zlib/zlib.c (zstream_expand_buffer_protect): Added wrapper - function to pass jump state back through GVL-free section to allow - zstream clean-up before terminating the ruby call. - * ext/zlib/zlib.c (zstream_expand_buffer_without_gvl): Acquire GVL to - yield processed chunk of output stream. - * ext/zlib/zlib.c (zstream_detach_buffer): When a block is given, - returns Qnil mid-stream and yields the output buffer at the end of - the stream. - * ext/zlib/extconf.rb: Update INCFLAGS to find internal.h for - rb_thread_call_with_gvl - * test/zlib/test_zlib.rb: Added tests for streaming processing - Tue Jul 10 00:44:41 2012 KOSAKI Motohiro * include/ruby/ruby.h: Removed RUBY_GLOBAL_SETUP complely. It is diff --git a/NEWS b/NEWS index 7c356b9352..b1c74ac05e 100644 --- a/NEWS +++ b/NEWS @@ -123,8 +123,6 @@ with all sufficient information, see the ChangeLog file. accessible from other users. * zlib - * Added streaming support for Zlib::Inflate and Zlib::Deflate. This allows - processing of a stream without the use of large amounts of memory. * Added support for the new deflate strategies Zlib::RLE and Zlib::FIXED. * Zlib streams are now processed without the GVL. This allows gzip, zlib and deflate streams to be processed in parallel. diff --git a/ext/zlib/extconf.rb b/ext/zlib/extconf.rb index c08dba7206..c49c29d463 100644 --- a/ext/zlib/extconf.rb +++ b/ext/zlib/extconf.rb @@ -7,10 +7,9 @@ require 'mkmf' require 'rbconfig' -$INCFLAGS << " -I$(topdir) -I$(top_srcdir)" - dir_config 'zlib' + if %w'z libz zlib1 zlib zdll'.find {|z| have_library(z, 'deflateReset')} and have_header('zlib.h') then diff --git a/ext/zlib/zlib.c b/ext/zlib/zlib.c index f4eb0f813b..fc49e4454a 100644 --- a/ext/zlib/zlib.c +++ b/ext/zlib/zlib.c @@ -10,7 +10,6 @@ #include #include #include -#include "internal.h" #ifdef HAVE_VALGRIND_MEMCHECK_H # include @@ -545,17 +544,12 @@ struct zstream { #define ZSTREAM_FLAG_IN_STREAM 0x2 #define ZSTREAM_FLAG_FINISHED 0x4 #define ZSTREAM_FLAG_CLOSING 0x8 -#define ZSTREAM_FLAG_GZFILE 0x10 /* disallows yield from expand_buffer for - gzip*/ -#define ZSTREAM_FLAG_UNUSED 0x20 +#define ZSTREAM_FLAG_UNUSED 0x10 #define ZSTREAM_READY(z) ((z)->flags |= ZSTREAM_FLAG_READY) #define ZSTREAM_IS_READY(z) ((z)->flags & ZSTREAM_FLAG_READY) #define ZSTREAM_IS_FINISHED(z) ((z)->flags & ZSTREAM_FLAG_FINISHED) #define ZSTREAM_IS_CLOSING(z) ((z)->flags & ZSTREAM_FLAG_CLOSING) -#define ZSTREAM_IS_GZFILE(z) ((z)->flags & ZSTREAM_FLAG_GZFILE) - -#define ZSTREAM_EXPAND_BUFFER_OK 0 /* I think that more better value should be found, but I gave up finding it. B) */ @@ -574,9 +568,8 @@ static const struct zstream_funcs inflate_funcs = { struct zstream_run_args { struct zstream * z; - int flush; /* stream flush value for inflate() or deflate() */ - int interrupt; /* stop processing the stream and return to ruby */ - int jump_state; /* for buffer expansion block break or exception */ + int flush; + int interrupt; }; static voidpf @@ -621,50 +614,33 @@ zstream_init(struct zstream *z, const struct zstream_funcs *func) static void zstream_expand_buffer(struct zstream *z) { + long inc; + if (NIL_P(z->buf)) { - zstream_expand_buffer_into(z, ZSTREAM_INITIAL_BUFSIZE); + /* I uses rb_str_new here not rb_str_buf_new because + rb_str_buf_new makes a zero-length string. */ + z->buf = rb_str_new(0, ZSTREAM_INITIAL_BUFSIZE); + z->buf_filled = 0; + z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf); + z->stream.avail_out = ZSTREAM_INITIAL_BUFSIZE; + RBASIC(z->buf)->klass = 0; return; } - if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) { - if (z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { - int state = 0; - VALUE self = (VALUE)z->stream.opaque; - - rb_str_resize(z->buf, z->buf_filled); - RBASIC(z->buf)->klass = rb_cString; - OBJ_INFECT(z->buf, self); - - rb_protect(rb_yield, z->buf, &state); - - z->buf = Qnil; - zstream_expand_buffer_into(z, ZSTREAM_AVAIL_OUT_STEP_MAX); - - if (state) - rb_jump_tag(state); - - return; - } - else { - zstream_expand_buffer_into(z, - ZSTREAM_AVAIL_OUT_STEP_MAX - z->buf_filled); - } + if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { + /* to keep other threads from freezing */ + z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX; } else { - if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { - z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX; + inc = z->buf_filled / 2; + if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) { + inc = ZSTREAM_AVAIL_OUT_STEP_MIN; } - else { - long inc = z->buf_filled / 2; - if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) { - inc = ZSTREAM_AVAIL_OUT_STEP_MIN; - } - rb_str_resize(z->buf, z->buf_filled + inc); - z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ? - (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX; - } - z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled; + rb_str_resize(z->buf, z->buf_filled + inc); + z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ? + (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX; } + z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled; } static void @@ -686,27 +662,13 @@ zstream_expand_buffer_into(struct zstream *z, unsigned long size) } } -static void * -zstream_expand_buffer_protect(void *ptr) -{ - struct zstream *z = (struct zstream *)ptr; - int state = 0; - - rb_protect((VALUE (*)(VALUE))zstream_expand_buffer, (VALUE)z, &state); - - return (void *)state; -} - static int zstream_expand_buffer_without_gvl(struct zstream *z) { char * new_str; long inc, len; - if (rb_block_given_p()) { - return (int)rb_thread_call_with_gvl(zstream_expand_buffer_protect, (void *)z); - } - else if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { + if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX; } else { @@ -719,6 +681,9 @@ zstream_expand_buffer_without_gvl(struct zstream *z) new_str = ruby_xrealloc(RSTRING(z->buf)->as.heap.ptr, len + 1); + if (!new_str) + return 0; + /* from rb_str_resize */ RSTRING(z->buf)->as.heap.ptr = new_str; RSTRING(z->buf)->as.heap.ptr[len] = '\0'; /* sentinel */ @@ -730,7 +695,7 @@ zstream_expand_buffer_without_gvl(struct zstream *z) } z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled; - return ZSTREAM_EXPAND_BUFFER_OK; + return 1; } static void @@ -771,13 +736,6 @@ zstream_detach_buffer(struct zstream *z) { VALUE dst, self = (VALUE)z->stream.opaque; - if (!ZSTREAM_IS_FINISHED(z) && !ZSTREAM_IS_GZFILE(z) && - rb_block_given_p()) { - /* prevent tiny yields mid-stream, save for next - * zstream_expand_buffer() or stream end */ - return Qnil; - } - if (NIL_P(z->buf)) { dst = rb_str_new(0, 0); } @@ -793,12 +751,6 @@ zstream_detach_buffer(struct zstream *z) z->buf_filled = 0; z->stream.next_out = 0; z->stream.avail_out = 0; - - if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) { - rb_yield(dst); - dst = Qnil; - } - return dst; } @@ -968,7 +920,7 @@ static VALUE zstream_run_func(void *ptr) { struct zstream_run_args *args = (struct zstream_run_args *)ptr; - int err, state, flush = args->flush; + int err, flush = args->flush; struct zstream *z = args->z; uInt n; @@ -992,11 +944,8 @@ zstream_run_func(void *ptr) break; } - state = zstream_expand_buffer_without_gvl(z); - - if (state) { - err = Z_OK; /* buffer expanded but stream processing was stopped */ - args->jump_state = state; + if (!zstream_expand_buffer_without_gvl(z)) { + err = Z_MEM_ERROR; /* realloc failed */ break; } } @@ -1025,7 +974,6 @@ zstream_run(struct zstream *z, Bytef *src, long len, int flush) args.z = z; args.flush = flush; args.interrupt = 0; - args.jump_state = 0; if (NIL_P(z->input) && len == 0) { z->stream.next_in = (Bytef*)""; @@ -1077,9 +1025,6 @@ loop: zstream_append_input(z, z->stream.next_in, z->stream.avail_in); guard = Qnil; /* prevent tail call to make guard effective */ } - - if (args.jump_state) - rb_jump_tag(args.jump_state); } static VALUE @@ -1262,13 +1207,8 @@ rb_zstream_reset(VALUE obj) } /* - * call-seq: - * finish -> String - * finish { |chunk| ... } -> nil - * - * Finishes the stream and flushes output buffer. If a block is given each - * chunk is yielded to the block until the input buffer has been flushed to - * the output buffer. + * Finishes the stream and flushes output buffer. See Zlib::Deflate#finish and + * Zlib::Inflate#finish for details of this behavior. */ static VALUE rb_zstream_finish(VALUE obj) @@ -1281,13 +1221,7 @@ rb_zstream_finish(VALUE obj) } /* - * call-seq: - * flush_next_out -> String - * flush_next_out { |chunk| ... } -> nil - * - * Flushes output buffer and returns all data in that buffer. If a block is - * given each chunk is yielded to the block until the current output buffer - * has been flushed. + * Flushes input buffer and returns all data in that buffer. */ static VALUE rb_zstream_flush_next_in(VALUE obj) @@ -1569,13 +1503,13 @@ deflate_run(VALUE args) /* * Document-method: Zlib::Deflate.deflate * - * call-seq: - * Zlib.deflate(string[, level]) - * Zlib::Deflate.deflate(string[, level]) + * call-seq: Zlib.deflate(string[, level]) + * Zlib::Deflate.deflate(string[, level]) * * Compresses the given +string+. Valid values of level are - * Zlib::NO_COMPRESSION, Zlib::BEST_SPEED, Zlib::BEST_COMPRESSION, - * Zlib::DEFAULT_COMPRESSION, or an integer from 0 to 9 (the default is 6). + * NO_COMPRESSION, BEST_SPEED, + * BEST_COMPRESSION, DEFAULT_COMPRESSION, and an + * integer from 0 to 9 (the default is 6). * * This method is almost equivalent to the following code: * @@ -1629,19 +1563,17 @@ do_deflate(struct zstream *z, VALUE src, int flush) } /* - * Document-method: Zlib::Deflate#deflate + * Document-method: Zlib#deflate * * call-seq: - * z.deflate(string, flush = Zlib::NO_FLUSH) -> String - * z.deflate(string, flush = Zlib::NO_FLUSH) { |chunk| ... } -> nil + * deflate(string, flush = Zlib::NO_FLUSH) * * Inputs +string+ into the deflate stream and returns the output from the * stream. On calling this method, both the input and the output buffers of - * the stream are flushed. If +string+ is nil, this method finishes the - * stream, just like Zlib::ZStream#finish. + * the stream are flushed. * - * If a block is given consecutive deflated chunks from the +string+ are - * yielded to the block and +nil+ is returned. + * If +string+ is nil, this method finishes the stream, just like + * Zlib::ZStream#finish. * * The +flush+ parameter specifies the flush mode. The following constants * may be used: @@ -1688,13 +1620,10 @@ rb_deflate_addstr(VALUE obj, VALUE src) * Document-method: Zlib::Deflate#flush * * call-seq: - * flush(flush = Zlib::SYNC_FLUSH) -> String - * flush(flush = Zlib::SYNC_FLUSH) { |chunk| ... } -> nil + * flush(flush = Zlib::SYNC_FLUSH) * * This method is equivalent to deflate('', flush). This method is - * just provided to improve the readability of your Ruby program. If a block - * is given chunks of deflate output are yielded to the block until the buffer - * is flushed. + * just provided to improve the readability of your Ruby program. * * See Zlib::Deflate#deflate for detail on the +flush+ constants NO_FLUSH, * SYNC_FLUSH, FULL_FLUSH and FINISH. @@ -1882,11 +1811,9 @@ inflate_run(VALUE args) } /* - * Document-method: Zlib::inflate + * Document-method: Zlib::Inflate.inflate * - * call-seq: - * Zlib.inflate(string) - * Zlib::Inflate.inflate(string) + * call-seq: Zlib::Inflate.inflate(string) * * Decompresses +string+. Raises a Zlib::NeedDict exception if a preset * dictionary is needed for decompression. @@ -1962,17 +1889,12 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary) { /* * Document-method: Zlib::Inflate#inflate * - * call-seq: - * inflate(deflate_string) -> String - * inflate(deflate_string) { |chunk| ... } -> nil + * call-seq: inflate(string) * - * Inputs +deflate_string+ into the inflate stream and returns the output from - * the stream. Calling this method, both the input and the output buffer of - * the stream are flushed. If string is +nil+, this method finishes the - * stream, just like Zlib::ZStream#finish. - * - * If a block is given consecutive inflated chunks from the +deflate_string+ - * are yielded to the block and +nil+ is returned. + * Inputs +string+ into the inflate stream and returns the output from the + * stream. Calling this method, both the input and the output buffer of the + * stream are flushed. If string is +nil+, this method finishes the stream, + * just like Zlib::ZStream#finish. * * Raises a Zlib::NeedDict exception if a preset dictionary is needed to * decompress. Set the dictionary by Zlib::Inflate#set_dictionary and then @@ -2246,7 +2168,6 @@ gzfile_new(klass, funcs, endfunc) obj = Data_Make_Struct(klass, struct gzfile, gzfile_mark, gzfile_free, gz); zstream_init(&gz->z, funcs); - gz->z.flags |= ZSTREAM_FLAG_GZFILE; gz->io = Qnil; gz->level = 0; gz->mtime = 0; diff --git a/test/zlib/test_zlib.rb b/test/zlib/test_zlib.rb index aa3486359c..61e24e4d44 100644 --- a/test/zlib/test_zlib.rb +++ b/test/zlib/test_zlib.rb @@ -39,62 +39,6 @@ if defined? Zlib assert_raise(Zlib::StreamError) { Zlib::Deflate.deflate("foo", 10000) } end - def test_deflate_chunked - original = '' - chunks = [] - r = Random.new 0 - - z = Zlib::Deflate.new - - 2.times do - input = r.bytes(20000) - original << input - z.deflate(input) do |chunk| - chunks << chunk - end - end - - assert_equal [16384, 16384], - chunks.map { |chunk| chunk.length } - - final = z.finish - - assert_equal 7253, final.length - - chunks << final - all = chunks.join - - inflated = Zlib.inflate all - - assert_equal original, inflated - end - - def test_deflate_chunked_break - chunks = [] - r = Random.new 0 - - z = Zlib::Deflate.new - - input = r.bytes(20000) - z.deflate(input) do |chunk| - chunks << chunk - break - end - - assert_equal [16384], chunks.map { |chunk| chunk.length } - - final = z.finish - - assert_equal 3632, final.length - - all = chunks.join - all << final - - original = Zlib.inflate all - - assert_equal input, original - end - def test_addstr z = Zlib::Deflate.new z << "foo" @@ -258,38 +202,6 @@ if defined? Zlib assert_equal "foofoofoo", out end - def test_finish_chunked - # zeros = Zlib::Deflate.deflate("0" * 100_000) - zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ - "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\257\006\351\247BH" - - chunks = [] - - z = Zlib::Inflate.new - - z.inflate(zeros) do |chunk| - chunks << chunk - break - end - - z.finish do |chunk| - chunks << chunk - end - - assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696], - chunks.map { |chunk| chunk.size } - - assert chunks.all? { |chunk| - chunk =~ /\A0+\z/ - } - end - def test_inflate s = Zlib::Deflate.deflate("foo") z = Zlib::Inflate.new @@ -319,58 +231,6 @@ if defined? Zlib assert_equal "\0", inflated end - def test_inflate_chunked - # s = Zlib::Deflate.deflate("0" * 100_000) - zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ - "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\257\006\351\247BH" - - chunks = [] - - z = Zlib::Inflate.new - - z.inflate(zeros) do |chunk| - chunks << chunk - end - - assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696], - chunks.map { |chunk| chunk.size } - - assert chunks.all? { |chunk| - chunk =~ /\A0+\z/ - } - end - - def test_inflate_chunked_break - # zeros = Zlib::Deflate.deflate("0" * 100_000) - zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ - "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\257\006\351\247BH" - - chunks = [] - - z = Zlib::Inflate.new - - z.inflate(zeros) do |chunk| - chunks << chunk - break - end - - out = z.inflate nil - - assert_equal 100_000 - chunks.first.length, out.length - end - def test_inflate_dictionary dictionary = "foo" @@ -1036,18 +896,5 @@ if defined? Zlib def test_deflate TestZlibDeflate.new(__name__).test_deflate end - - def test_deflate_stream - r = Random.new 0 - - deflated = '' - - Zlib.deflate(r.bytes(20000)) do |chunk| - deflated << chunk - end - - assert_equal 20016, deflated.length - end - end end