/********************************************************************** pack.c - $Author$ created at: Thu Feb 10 15:17:05 JST 1994 Copyright (C) 1993-2007 Yukihiro Matsumoto **********************************************************************/ #include "ruby/internal/config.h" #include #include #include #include #include "internal.h" #include "internal/array.h" #include "internal/bits.h" #include "internal/string.h" #include "internal/symbol.h" #include "internal/variable.h" #include "ruby/util.h" #include "builtin.h" /* * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG * instead of HAVE_LONG_LONG or LONG_LONG. * This means q! and Q! means always the standard long long type and * causes ArgumentError for platforms which has no long long type, * even if the platform has an implementation specific 64bit type. * This behavior is consistent with the document of pack/unpack. */ #ifdef HAVE_TRUE_LONG_LONG static const char natstr[] = "sSiIlLqQjJ"; # define endstr natstr #else static const char natstr[] = "sSiIlLjJ"; static const char endstr[] = "sSiIlLqQjJ"; #endif #ifdef HAVE_TRUE_LONG_LONG /* It is intentional to use long long instead of LONG_LONG. */ # define NATINT_LEN_Q NATINT_LEN(long long, 8) #else # define NATINT_LEN_Q 8 #endif #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8) # define NATINT_PACK #endif #ifdef DYNAMIC_ENDIAN /* for universal binary of NEXTSTEP and MacOS X */ /* useless since autoconf 2.63? */ static int is_bigendian(void) { static int init = 0; static int endian_value; char *p; if (init) return endian_value; init = 1; p = (char*)&init; return endian_value = p[0]?0:1; } # define BIGENDIAN_P() (is_bigendian()) #elif defined(WORDS_BIGENDIAN) # define BIGENDIAN_P() 1 #else # define BIGENDIAN_P() 0 #endif #ifdef NATINT_PACK # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len)) #else # define NATINT_LEN(type,len) ((int)sizeof(type)) #endif typedef union { float f; uint32_t u; char buf[4]; } FLOAT_SWAPPER; typedef union { double d; uint64_t u; char buf[8]; } DOUBLE_SWAPPER; #define swapf(x) swap32(x) #define swapd(x) swap64(x) #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x)) #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x)) #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x)) #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x)) #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x)) #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x)) #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x)) #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x)) #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x; #define HTONF(x) ((x).u = rb_htonf((x).u)) #define HTOVF(x) ((x).u = rb_htovf((x).u)) #define NTOHF(x) ((x).u = rb_ntohf((x).u)) #define VTOHF(x) ((x).u = rb_vtohf((x).u)) #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x; #define HTOND(x) ((x).u = rb_htond((x).u)) #define HTOVD(x) ((x).u = rb_htovd((x).u)) #define NTOHD(x) ((x).u = rb_ntohd((x).u)) #define VTOHD(x) ((x).u = rb_vtohd((x).u)) #define MAX_INTEGER_PACK_SIZE 8 static const char toofew[] = "too few arguments"; static void encodes(VALUE,const char*,long,int,int); static void qpencode(VALUE,VALUE,long); static unsigned long utf8_to_uv(const char*,long*); static ID id_associated; static void str_associate(VALUE str, VALUE add) { /* assert(NIL_P(rb_attr_get(str, id_associated))); */ rb_ivar_set(str, id_associated, add); } static VALUE str_associated(VALUE str) { VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse); if (!associates) rb_raise(rb_eArgError, "no associated pointer"); return associates; } static VALUE associated_pointer(VALUE associates, const char *t) { const VALUE *p = RARRAY_CONST_PTR(associates); const VALUE *pend = p + RARRAY_LEN(associates); for (; p < pend; p++) { VALUE tmp = *p; if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp; } rb_raise(rb_eArgError, "non associated pointer"); UNREACHABLE_RETURN(Qnil); } RBIMPL_ATTR_NORETURN() static void unknown_directive(const char *mode, char type, VALUE fmt) { char unknown[5]; if (ISPRINT(type)) { unknown[0] = type; unknown[1] = '\0'; } else { snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff); } fmt = rb_str_quote_unprintable(fmt); rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'", mode, unknown, fmt); } static float VALUE_to_float(VALUE obj) { VALUE v = rb_to_float(obj); double d = RFLOAT_VALUE(v); if (isnan(d)) { return NAN; } else if (d < -FLT_MAX) { return -INFINITY; } else if (d <= FLT_MAX) { return d; } else { return INFINITY; } } static void str_expand_fill(VALUE res, int c, long len) { long olen = RSTRING_LEN(res); memset(RSTRING_PTR(res) + olen, c, len); rb_str_set_len(res, olen + len); } static char * skip_to_eol(const char *p, const char *pend) { p = memchr(p, '\n', pend - p); return (char *)(p ? p + 1 : pend); } #define skip_blank(p, type) \ (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1))) #ifndef NATINT_PACK # define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e) #endif static char * pack_modifiers(const char *p, char type, int *natint, int *explicit_endian) { while (1) { switch (*p) { case '_': case '!': if (strchr(natstr, type)) { #ifdef NATINT_PACK *natint = 1; #endif p++; } else { rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); } break; case '<': case '>': if (!strchr(endstr, type)) { rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); } if (*explicit_endian) { rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); } *explicit_endian = *p++; break; default: return (char *)p; } } } static VALUE pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) { const char *p, *pend; VALUE res, from, associates = 0; long len, idx, plen; const char *ptr; int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ int integer_size, bigendian_p; StringValue(fmt); rb_must_asciicompat(fmt); p = RSTRING_PTR(fmt); pend = p + RSTRING_LEN(fmt); if (NIL_P(buffer)) { res = rb_str_buf_new(0); } else { if (!RB_TYPE_P(buffer, T_STRING)) rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer)); rb_str_modify(buffer); res = buffer; } idx = 0; #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0) #define MORE_ITEM (idx < RARRAY_LEN(ary)) #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW) #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW) while (p < pend) { int explicit_endian = 0; if (RSTRING_END(fmt) != pend) { rb_raise(rb_eRuntimeError, "format string modified"); } const char type = *p++; /* get data type */ #ifdef NATINT_PACK int natint = 0; /* native integer */ #endif if (skip_blank(p, type)) continue; p = pack_modifiers(p, type, &natint, &explicit_endian); if (*p == '*') { /* set data length */ len = strchr("@Xxu", type) ? 0 : strchr("PMm", type) ? 1 : RARRAY_LEN(ary) - idx; p++; } else if (ISDIGIT(*p)) { errno = 0; len = STRTOUL(p, (char**)&p, 10); if (errno) { rb_raise(rb_eRangeError, "pack length too big"); } } else { len = 1; } switch (type) { case 'U': /* if encoding is US-ASCII, upgrade to UTF-8 */ if (enc_info == 1) enc_info = 2; break; case 'm': case 'M': case 'u': /* keep US-ASCII (do nothing) */ break; default: /* fall back to BINARY */ enc_info = 0; break; } switch (type) { case 'A': case 'a': case 'Z': case 'B': case 'b': case 'H': case 'h': from = NEXTFROM; if (NIL_P(from)) { ptr = ""; plen = 0; } else { StringValue(from); ptr = RSTRING_PTR(from); plen = RSTRING_LEN(from); } if (p[-1] == '*') len = plen; switch (type) { case 'a': /* arbitrary binary string (null padded) */ case 'A': /* arbitrary binary string (ASCII space padded) */ case 'Z': /* null terminated string */ if (plen >= len) { rb_str_buf_cat(res, ptr, len); if (p[-1] == '*' && type == 'Z') rb_str_buf_cat(res, "", 1); } else { rb_str_modify_expand(res, len); rb_str_buf_cat(res, ptr, plen); str_expand_fill(res, (type == 'A' ? ' ' : '\0'), len - plen); } break; #define castchar(from) (char)((from) & 0xff) case 'b': /* bit string (ascending) */ { int byte = 0; long i, j = 0; if (len > plen) { j = (len - plen + 1)/2; len = plen; } for (i=0; i++ < len; ptr++) { if (*ptr & 1) byte |= 128; if (i & 7) byte >>= 1; else { char c = castchar(byte); rb_str_buf_cat(res, &c, 1); byte = 0; } } if (len & 7) { char c; byte >>= 7 - (len & 7); c = castchar(byte); rb_str_buf_cat(res, &c, 1); } len = j; goto grow; } break; case 'B': /* bit string (descending) */ { int byte = 0; long i, j = 0; if (len > plen) { j = (len - plen + 1)/2; len = plen; } for (i=0; i++ < len; ptr++) { byte |= *ptr & 1; if (i & 7) byte <<= 1; else { char c = castchar(byte); rb_str_buf_cat(res, &c, 1); byte = 0; } } if (len & 7) { char c; byte <<= 7 - (len & 7); c = castchar(byte); rb_str_buf_cat(res, &c, 1); } len = j; goto grow; } break; case 'h': /* hex string (low nibble first) */ { int byte = 0; long i, j = 0; if (len > plen) { j = (len + 1) / 2 - (plen + 1) / 2; len = plen; } for (i=0; i++ < len; ptr++) { if (ISALPHA(*ptr)) byte |= (((*ptr & 15) + 9) & 15) << 4; else byte |= (*ptr & 15) << 4; if (i & 1) byte >>= 4; else { char c = castchar(byte); rb_str_buf_cat(res, &c, 1); byte = 0; } } if (len & 1) { char c = castchar(byte); rb_str_buf_cat(res, &c, 1); } len = j; goto grow; } break; case 'H': /* hex string (high nibble first) */ { int byte = 0; long i, j = 0; if (len > plen) { j = (len + 1) / 2 - (plen + 1) / 2; len = plen; } for (i=0; i++ < len; ptr++) { if (ISALPHA(*ptr)) byte |= ((*ptr & 15) + 9) & 15; else byte |= *ptr & 15; if (i & 1) byte <<= 4; else { char c = castchar(byte); rb_str_buf_cat(res, &c, 1); byte = 0; } } if (len & 1) { char c = castchar(byte); rb_str_buf_cat(res, &c, 1); } len = j; goto grow; } break; } break; case 'c': /* signed char */ case 'C': /* unsigned char */ integer_size = 1; bigendian_p = BIGENDIAN_P(); /* not effective */ goto pack_integer; case 's': /* s for int16_t, s! for signed short */ case 'S': /* S for uint16_t, S! for unsigned short */ integer_size = NATINT_LEN(short, 2); bigendian_p = BIGENDIAN_P(); goto pack_integer; case 'i': /* i and i! for signed int */ case 'I': /* I and I! for unsigned int */ integer_size = (int)sizeof(int); bigendian_p = BIGENDIAN_P(); goto pack_integer; case 'l': /* l for int32_t, l! for signed long */ case 'L': /* L for uint32_t, L! for unsigned long */ integer_size = NATINT_LEN(long, 4); bigendian_p = BIGENDIAN_P(); goto pack_integer; case 'q': /* q for int64_t, q! for signed long long */ case 'Q': /* Q for uint64_t, Q! for unsigned long long */ integer_size = NATINT_LEN_Q; bigendian_p = BIGENDIAN_P(); goto pack_integer; case 'j': /* j for intptr_t */ integer_size = sizeof(intptr_t); bigendian_p = BIGENDIAN_P(); goto pack_integer; case 'J': /* J for uintptr_t */ integer_size = sizeof(uintptr_t); bigendian_p = BIGENDIAN_P(); goto pack_integer; case 'n': /* 16 bit (2 bytes) integer (network byte-order) */ integer_size = 2; bigendian_p = 1; goto pack_integer; case 'N': /* 32 bit (4 bytes) integer (network byte-order) */ integer_size = 4; bigendian_p = 1; goto pack_integer; case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */ integer_size = 2; bigendian_p = 0; goto pack_integer; case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */ integer_size = 4; bigendian_p = 0; goto pack_integer; pack_integer: if (explicit_endian) { bigendian_p = explicit_endian == '>'; } if (integer_size > MAX_INTEGER_PACK_SIZE) rb_bug("unexpected integer size for pack: %d", integer_size); while (len-- > 0) { char intbuf[MAX_INTEGER_PACK_SIZE]; from = NEXTFROM; rb_integer_pack(from, intbuf, integer_size, 1, 0, INTEGER_PACK_2COMP | (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN)); rb_str_buf_cat(res, intbuf, integer_size); } break; case 'f': /* single precision float in native format */ case 'F': /* ditto */ while (len-- > 0) { float f; from = NEXTFROM; f = VALUE_to_float(from); rb_str_buf_cat(res, (char*)&f, sizeof(float)); } break; case 'e': /* single precision float in VAX byte-order */ while (len-- > 0) { FLOAT_CONVWITH(tmp); from = NEXTFROM; tmp.f = VALUE_to_float(from); HTOVF(tmp); rb_str_buf_cat(res, tmp.buf, sizeof(float)); } break; case 'E': /* double precision float in VAX byte-order */ while (len-- > 0) { DOUBLE_CONVWITH(tmp); from = NEXTFROM; tmp.d = RFLOAT_VALUE(rb_to_float(from)); HTOVD(tmp); rb_str_buf_cat(res, tmp.buf, sizeof(double)); } break; case 'd': /* double precision float in native format */ case 'D': /* ditto */ while (len-- > 0) { double d; from = NEXTFROM; d = RFLOAT_VALUE(rb_to_float(from)); rb_str_buf_cat(res, (char*)&d, sizeof(double)); } break; case 'g': /* single precision float in network byte-order */ while (len-- > 0) { FLOAT_CONVWITH(tmp); from = NEXTFROM; tmp.f = VALUE_to_float(from); HTONF(tmp); rb_str_buf_cat(res, tmp.buf, sizeof(float)); } break; case 'G': /* double precision float in network byte-order */ while (len-- > 0) { DOUBLE_CONVWITH(tmp); from = NEXTFROM; tmp.d = RFLOAT_VALUE(rb_to_float(from)); HTOND(tmp); rb_str_buf_cat(res, tmp.buf, sizeof(double)); } break; case 'x': /* null byte */ grow: rb_str_modify_expand(res, len); str_expand_fill(res, '\0', len); break; case 'X': /* back up byte */ shrink: plen = RSTRING_LEN(res); if (plen < len) rb_raise(rb_eArgError, "X outside of string"); rb_str_set_len(res, plen - len); break; case '@': /* null fill to absolute position */ len -= RSTRING_LEN(res); if (len > 0) goto grow; len = -len; if (len > 0) goto shrink; break; case '%': rb_raise(rb_eArgError, "%% is not supported"); break; case 'U': /* Unicode character */ while (len-- > 0) { SIGNED_VALUE l; char buf[8]; int le; from = NEXTFROM; from = rb_to_int(from); l = NUM2LONG(from); if (l < 0) { rb_raise(rb_eRangeError, "pack(U): value out of range"); } le = rb_uv_to_utf8(buf, l); rb_str_buf_cat(res, (char*)buf, le); } break; case 'u': /* uuencoded string */ case 'm': /* base64 encoded string */ from = NEXTFROM; StringValue(from); ptr = RSTRING_PTR(from); plen = RSTRING_LEN(from); if (len == 0 && type == 'm') { encodes(res, ptr, plen, type, 0); ptr += plen; break; } if (len <= 2) len = 45; else if (len > 63 && type == 'u') len = 63; else len = len / 3 * 3; while (plen > 0) { long todo; if (plen > len) todo = len; else todo = plen; encodes(res, ptr, todo, type, 1); plen -= todo; ptr += todo; } break; case 'M': /* quoted-printable encoded string */ from = rb_obj_as_string(NEXTFROM); if (len <= 1) len = 72; qpencode(res, from, len); break; case 'P': /* pointer to packed byte string */ from = THISFROM; if (!NIL_P(from)) { StringValue(from); if (RSTRING_LEN(from) < len) { rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", RSTRING_LEN(from), len); } } len = 1; /* FALL THROUGH */ case 'p': /* pointer to string */ while (len-- > 0) { char *t; from = NEXTFROM; if (NIL_P(from)) { t = 0; } else { t = StringValuePtr(from); } if (!associates) { associates = rb_ary_new(); } rb_ary_push(associates, from); rb_str_buf_cat(res, (char*)&t, sizeof(char*)); } break; case 'w': /* BER compressed integer */ while (len-- > 0) { VALUE buf = rb_str_new(0, 0); size_t numbytes; int sign; char *cp; from = NEXTFROM; from = rb_to_int(from); numbytes = rb_absint_numwords(from, 7, NULL); if (numbytes == 0) numbytes = 1; buf = rb_str_new(NULL, numbytes); sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN); if (sign < 0) rb_raise(rb_eArgError, "can't compress negative numbers"); if (sign == 2) rb_bug("buffer size problem?"); cp = RSTRING_PTR(buf); while (1 < numbytes) { *cp |= 0x80; cp++; numbytes--; } rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf)); } break; default: { unknown_directive("pack", type, fmt); break; } } } if (associates) { str_associate(res, associates); } switch (enc_info) { case 1: ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); break; case 2: rb_enc_set_index(res, rb_utf8_encindex()); break; default: /* do nothing, keep ASCII-8BIT */ break; } return res; } VALUE rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) { return pack_pack(ec, ary, fmt, buffer); } static const char uu_table[] = "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; static const char b64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static void encodes(VALUE str, const char *s0, long len, int type, int tail_lf) { enum {buff_size = 4096, encoded_unit = 4, input_unit = 3}; char buff[buff_size + 1]; /* +1 for tail_lf */ long i = 0; const char *const trans = type == 'u' ? uu_table : b64_table; char padding; const unsigned char *s = (const unsigned char *)s0; if (type == 'u') { buff[i++] = (char)len + ' '; padding = '`'; } else { padding = '='; } while (len >= input_unit) { while (len >= input_unit && buff_size-i >= encoded_unit) { buff[i++] = trans[077 & (*s >> 2)]; buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; buff[i++] = trans[077 & s[2]]; s += input_unit; len -= input_unit; } if (buff_size-i < encoded_unit) { rb_str_buf_cat(str, buff, i); i = 0; } } if (len == 2) { buff[i++] = trans[077 & (*s >> 2)]; buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; buff[i++] = padding; } else if (len == 1) { buff[i++] = trans[077 & (*s >> 2)]; buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; buff[i++] = padding; buff[i++] = padding; } if (tail_lf) buff[i++] = '\n'; rb_str_buf_cat(str, buff, i); if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun"); } static const char hex_table[] = "0123456789ABCDEF"; static void qpencode(VALUE str, VALUE from, long len) { char buff[1024]; long i = 0, n = 0, prev = EOF; unsigned char *s = (unsigned char*)RSTRING_PTR(from); unsigned char *send = s + RSTRING_LEN(from); while (s < send) { if ((*s > 126) || (*s < 32 && *s != '\n' && *s != '\t') || (*s == '=')) { buff[i++] = '='; buff[i++] = hex_table[*s >> 4]; buff[i++] = hex_table[*s & 0x0f]; n += 3; prev = EOF; } else if (*s == '\n') { if (prev == ' ' || prev == '\t') { buff[i++] = '='; buff[i++] = *s; } buff[i++] = *s; n = 0; prev = *s; } else { buff[i++] = *s; n++; prev = *s; } if (n > len) { buff[i++] = '='; buff[i++] = '\n'; n = 0; prev = '\n'; } if (i > 1024 - 5) { rb_str_buf_cat(str, buff, i); i = 0; } s++; } if (n > 0) { buff[i++] = '='; buff[i++] = '\n'; } if (i > 0) { rb_str_buf_cat(str, buff, i); } } static inline int hex2num(char c) { int n; n = ruby_digit36_to_number_table[(unsigned char)c]; if (16 <= n) n = -1; return n; } #define PACK_LENGTH_ADJUST_SIZE(sz) do { \ tmp_len = 0; \ if (len > (long)((send-s)/(sz))) { \ if (!star) { \ tmp_len = len-(send-s)/(sz); \ } \ len = (send-s)/(sz); \ } \ } while (0) #define PACK_ITEM_ADJUST() do { \ if (tmp_len > 0 && mode == UNPACK_ARRAY) \ rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \ } while (0) /* Workaround for Oracle Developer Studio (Oracle Solaris Studio) * 12.4/12.5/12.6 C compiler optimization bug * with "-xO4" optimization option. */ #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150 # define AVOID_CC_BUG volatile #else # define AVOID_CC_BUG #endif enum unpack_mode { UNPACK_ARRAY, UNPACK_BLOCK, UNPACK_1 }; static VALUE pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset) { #define hexdigits ruby_hexdigits char *s, *send; char *p, *pend; VALUE ary, associates = Qfalse; long len; AVOID_CC_BUG long tmp_len; int signed_p, integer_size, bigendian_p; #define UNPACK_PUSH(item) do {\ VALUE item_val = (item);\ if ((mode) == UNPACK_BLOCK) {\ rb_yield(item_val);\ }\ else if ((mode) == UNPACK_ARRAY) {\ rb_ary_push(ary, item_val);\ }\ else /* if ((mode) == UNPACK_1) { */ {\ return item_val; \ }\ } while (0) StringValue(str); StringValue(fmt); rb_must_asciicompat(fmt); if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative"); len = RSTRING_LEN(str); if (offset > len) rb_raise(rb_eArgError, "offset outside of string"); s = RSTRING_PTR(str); send = s + len; s += offset; p = RSTRING_PTR(fmt); pend = p + RSTRING_LEN(fmt); #define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type)) ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil; while (p < pend) { int explicit_endian = 0; const char type = *p++; #ifdef NATINT_PACK int natint = 0; /* native integer */ #endif int star = 0; if (skip_blank(p, type)) continue; p = pack_modifiers(p, type, &natint, &explicit_endian); if (p >= pend) len = 1; else if (*p == '*') { star = 1; len = send - s; p++; } else if (ISDIGIT(*p)) { errno = 0; len = STRTOUL(p, (char**)&p, 10); if (len < 0 || errno) { rb_raise(rb_eRangeError, "pack length too big"); } } else { len = (type != '@'); } switch (type) { case '%': rb_raise(rb_eArgError, "%% is not supported"); break; case 'A': if (len > send - s) len = send - s; { long end = len; char *t = s + len - 1; while (t >= s) { if (*t != ' ' && *t != '\0') break; t--; len--; } UNPACK_PUSH(rb_str_new(s, len)); s += end; } break; case 'Z': { char *t = s; if (len > send-s) len = send-s; while (t < s+len && *t) t++; UNPACK_PUSH(rb_str_new(s, t-s)); if (t < send) t++; s = star ? t : s+len; } break; case 'a': if (len > send - s) len = send - s; UNPACK_PUSH(rb_str_new(s, len)); s += len; break; case 'b': { VALUE bitstr; char *t; int bits; long i; if (p[-1] == '*' || len > (send - s) * 8) len = (send - s) * 8; bits = 0; bitstr = rb_usascii_str_new(0, len); t = RSTRING_PTR(bitstr); for (i=0; i>= 1; else bits = (unsigned char)*s++; *t++ = (bits & 1) ? '1' : '0'; } UNPACK_PUSH(bitstr); } break; case 'B': { VALUE bitstr; char *t; int bits; long i; if (p[-1] == '*' || len > (send - s) * 8) len = (send - s) * 8; bits = 0; bitstr = rb_usascii_str_new(0, len); t = RSTRING_PTR(bitstr); for (i=0; i (send - s) * 2) len = (send - s) * 2; bits = 0; bitstr = rb_usascii_str_new(0, len); t = RSTRING_PTR(bitstr); for (i=0; i>= 4; else bits = (unsigned char)*s++; *t++ = hexdigits[bits & 15]; } UNPACK_PUSH(bitstr); } break; case 'H': { VALUE bitstr; char *t; int bits; long i; if (p[-1] == '*' || len > (send - s) * 2) len = (send - s) * 2; bits = 0; bitstr = rb_usascii_str_new(0, len); t = RSTRING_PTR(bitstr); for (i=0; i> 4) & 15]; } UNPACK_PUSH(bitstr); } break; case 'c': signed_p = 1; integer_size = 1; bigendian_p = BIGENDIAN_P(); /* not effective */ goto unpack_integer; case 'C': signed_p = 0; integer_size = 1; bigendian_p = BIGENDIAN_P(); /* not effective */ goto unpack_integer; case 's': signed_p = 1; integer_size = NATINT_LEN(short, 2); bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'S': signed_p = 0; integer_size = NATINT_LEN(short, 2); bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'i': signed_p = 1; integer_size = (int)sizeof(int); bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'I': signed_p = 0; integer_size = (int)sizeof(int); bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'l': signed_p = 1; integer_size = NATINT_LEN(long, 4); bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'L': signed_p = 0; integer_size = NATINT_LEN(long, 4); bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'q': signed_p = 1; integer_size = NATINT_LEN_Q; bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'Q': signed_p = 0; integer_size = NATINT_LEN_Q; bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'j': signed_p = 1; integer_size = sizeof(intptr_t); bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'J': signed_p = 0; integer_size = sizeof(uintptr_t); bigendian_p = BIGENDIAN_P(); goto unpack_integer; case 'n': signed_p = 0; integer_size = 2; bigendian_p = 1; goto unpack_integer; case 'N': signed_p = 0; integer_size = 4; bigendian_p = 1; goto unpack_integer; case 'v': signed_p = 0; integer_size = 2; bigendian_p = 0; goto unpack_integer; case 'V': signed_p = 0; integer_size = 4; bigendian_p = 0; goto unpack_integer; unpack_integer: if (explicit_endian) { bigendian_p = explicit_endian == '>'; } PACK_LENGTH_ADJUST_SIZE(integer_size); while (len-- > 0) { int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN; VALUE val; if (signed_p) flags |= INTEGER_PACK_2COMP; val = rb_integer_unpack(s, integer_size, 1, 0, flags); UNPACK_PUSH(val); s += integer_size; } PACK_ITEM_ADJUST(); break; case 'f': case 'F': PACK_LENGTH_ADJUST_SIZE(sizeof(float)); while (len-- > 0) { float tmp; UNPACK_FETCH(&tmp, float); UNPACK_PUSH(DBL2NUM((double)tmp)); } PACK_ITEM_ADJUST(); break; case 'e': PACK_LENGTH_ADJUST_SIZE(sizeof(float)); while (len-- > 0) { FLOAT_CONVWITH(tmp); UNPACK_FETCH(tmp.buf, float); VTOHF(tmp); UNPACK_PUSH(DBL2NUM(tmp.f)); } PACK_ITEM_ADJUST(); break; case 'E': PACK_LENGTH_ADJUST_SIZE(sizeof(double)); while (len-- > 0) { DOUBLE_CONVWITH(tmp); UNPACK_FETCH(tmp.buf, double); VTOHD(tmp); UNPACK_PUSH(DBL2NUM(tmp.d)); } PACK_ITEM_ADJUST(); break; case 'D': case 'd': PACK_LENGTH_ADJUST_SIZE(sizeof(double)); while (len-- > 0) { double tmp; UNPACK_FETCH(&tmp, double); UNPACK_PUSH(DBL2NUM(tmp)); } PACK_ITEM_ADJUST(); break; case 'g': PACK_LENGTH_ADJUST_SIZE(sizeof(float)); while (len-- > 0) { FLOAT_CONVWITH(tmp); UNPACK_FETCH(tmp.buf, float); NTOHF(tmp); UNPACK_PUSH(DBL2NUM(tmp.f)); } PACK_ITEM_ADJUST(); break; case 'G': PACK_LENGTH_ADJUST_SIZE(sizeof(double)); while (len-- > 0) { DOUBLE_CONVWITH(tmp); UNPACK_FETCH(tmp.buf, double); NTOHD(tmp); UNPACK_PUSH(DBL2NUM(tmp.d)); } PACK_ITEM_ADJUST(); break; case 'U': if (len > send - s) len = send - s; while (len > 0 && s < send) { long alen = send - s; unsigned long l; l = utf8_to_uv(s, &alen); s += alen; len--; UNPACK_PUSH(ULONG2NUM(l)); } break; case 'u': { VALUE buf = rb_str_new(0, (send - s)*3/4); char *ptr = RSTRING_PTR(buf); long total = 0; while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') { long a,b,c,d; char hunk[3]; len = ((unsigned char)*s++ - ' ') & 077; total += len; if (total > RSTRING_LEN(buf)) { len -= total - RSTRING_LEN(buf); total = RSTRING_LEN(buf); } while (len > 0) { long mlen = len > 3 ? 3 : len; if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') a = ((unsigned char)*s++ - ' ') & 077; else a = 0; if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') b = ((unsigned char)*s++ - ' ') & 077; else b = 0; if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') c = ((unsigned char)*s++ - ' ') & 077; else c = 0; if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') d = ((unsigned char)*s++ - ' ') & 077; else d = 0; hunk[0] = (char)(a << 2 | b >> 4); hunk[1] = (char)(b << 4 | c >> 2); hunk[2] = (char)(c << 6 | d); memcpy(ptr, hunk, mlen); ptr += mlen; len -= mlen; } if (s < send && (unsigned char)*s != '\r' && *s != '\n') s++; /* possible checksum byte */ if (s < send && *s == '\r') s++; if (s < send && *s == '\n') s++; } rb_str_set_len(buf, total); UNPACK_PUSH(buf); } break; case 'm': { VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */ char *ptr = RSTRING_PTR(buf); int a = -1,b = -1,c = 0,d = 0; static signed char b64_xtable[256]; if (b64_xtable['/'] <= 0) { int i; for (i = 0; i < 256; i++) { b64_xtable[i] = -1; } for (i = 0; i < 64; i++) { b64_xtable[(unsigned char)b64_table[i]] = (char)i; } } if (len == 0) { while (s < send) { a = b = c = d = -1; a = b64_xtable[(unsigned char)*s++]; if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); b = b64_xtable[(unsigned char)*s++]; if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); if (*s == '=') { if (s + 2 == send && *(s + 1) == '=') break; rb_raise(rb_eArgError, "invalid base64"); } c = b64_xtable[(unsigned char)*s++]; if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); if (s + 1 == send && *s == '=') break; d = b64_xtable[(unsigned char)*s++]; if (d == -1) rb_raise(rb_eArgError, "invalid base64"); *ptr++ = castchar(a << 2 | b >> 4); *ptr++ = castchar(b << 4 | c >> 2); *ptr++ = castchar(c << 6 | d); } if (c == -1) { *ptr++ = castchar(a << 2 | b >> 4); if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); } else if (d == -1) { *ptr++ = castchar(a << 2 | b >> 4); *ptr++ = castchar(b << 4 | c >> 2); if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); } } else { while (s < send) { a = b = c = d = -1; while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} if (s >= send) break; s++; while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} if (s >= send) break; s++; while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} if (*s == '=' || s >= send) break; s++; while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} if (*s == '=' || s >= send) break; s++; *ptr++ = castchar(a << 2 | b >> 4); *ptr++ = castchar(b << 4 | c >> 2); *ptr++ = castchar(c << 6 | d); a = -1; } if (a != -1 && b != -1) { if (c == -1) *ptr++ = castchar(a << 2 | b >> 4); else { *ptr++ = castchar(a << 2 | b >> 4); *ptr++ = castchar(b << 4 | c >> 2); } } } rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); UNPACK_PUSH(buf); } break; case 'M': { VALUE buf = rb_str_new(0, send - s); char *ptr = RSTRING_PTR(buf), *ss = s; int csum = 0; int c1, c2; while (s < send) { if (*s == '=') { if (++s == send) break; if (s+1 < send && *s == '\r' && *(s+1) == '\n') s++; if (*s != '\n') { if ((c1 = hex2num(*s)) == -1) break; if (++s == send) break; if ((c2 = hex2num(*s)) == -1) break; csum |= *ptr++ = castchar(c1 << 4 | c2); } } else { csum |= *ptr++ = *s; } s++; ss = s; } rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); rb_str_buf_cat(buf, ss, send-ss); csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum); UNPACK_PUSH(buf); } break; case '@': if (len > RSTRING_LEN(str)) rb_raise(rb_eArgError, "@ outside of string"); s = RSTRING_PTR(str) + len; break; case 'X': if (len > s - RSTRING_PTR(str)) rb_raise(rb_eArgError, "X outside of string"); s -= len; break; case 'x': if (len > send - s) rb_raise(rb_eArgError, "x outside of string"); s += len; break; case 'P': if (sizeof(char *) <= (size_t)(send - s)) { VALUE tmp = Qnil; char *t; UNPACK_FETCH(&t, char *); if (t) { if (!associates) associates = str_associated(str); tmp = associated_pointer(associates, t); if (len < RSTRING_LEN(tmp)) { tmp = rb_str_new(t, len); str_associate(tmp, associates); } } UNPACK_PUSH(tmp); } break; case 'p': if (len > (long)((send - s) / sizeof(char *))) len = (send - s) / sizeof(char *); while (len-- > 0) { if ((size_t)(send - s) < sizeof(char *)) break; else { VALUE tmp = Qnil; char *t; UNPACK_FETCH(&t, char *); if (t) { if (!associates) associates = str_associated(str); tmp = associated_pointer(associates, t); } UNPACK_PUSH(tmp); } } break; case 'w': { char *s0 = s; while (len > 0 && s < send) { if (*s & 0x80) { s++; } else { s++; UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN)); len--; s0 = s; } } } break; default: unknown_directive("unpack", type, fmt); break; } } return ary; } static VALUE pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset) { enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset)); } static VALUE pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset) { return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset)); } int rb_uv_to_utf8(char buf[6], unsigned long uv) { if (uv <= 0x7f) { buf[0] = (char)uv; return 1; } if (uv <= 0x7ff) { buf[0] = castchar(((uv>>6)&0xff)|0xc0); buf[1] = castchar((uv&0x3f)|0x80); return 2; } if (uv <= 0xffff) { buf[0] = castchar(((uv>>12)&0xff)|0xe0); buf[1] = castchar(((uv>>6)&0x3f)|0x80); buf[2] = castchar((uv&0x3f)|0x80); return 3; } if (uv <= 0x1fffff) { buf[0] = castchar(((uv>>18)&0xff)|0xf0); buf[1] = castchar(((uv>>12)&0x3f)|0x80); buf[2] = castchar(((uv>>6)&0x3f)|0x80); buf[3] = castchar((uv&0x3f)|0x80); return 4; } if (uv <= 0x3ffffff) { buf[0] = castchar(((uv>>24)&0xff)|0xf8); buf[1] = castchar(((uv>>18)&0x3f)|0x80); buf[2] = castchar(((uv>>12)&0x3f)|0x80); buf[3] = castchar(((uv>>6)&0x3f)|0x80); buf[4] = castchar((uv&0x3f)|0x80); return 5; } if (uv <= 0x7fffffff) { buf[0] = castchar(((uv>>30)&0xff)|0xfc); buf[1] = castchar(((uv>>24)&0x3f)|0x80); buf[2] = castchar(((uv>>18)&0x3f)|0x80); buf[3] = castchar(((uv>>12)&0x3f)|0x80); buf[4] = castchar(((uv>>6)&0x3f)|0x80); buf[5] = castchar((uv&0x3f)|0x80); return 6; } rb_raise(rb_eRangeError, "pack(U): value out of range"); UNREACHABLE_RETURN(Qnil); } static const unsigned long utf8_limits[] = { 0x0, /* 1 */ 0x80, /* 2 */ 0x800, /* 3 */ 0x10000, /* 4 */ 0x200000, /* 5 */ 0x4000000, /* 6 */ 0x80000000, /* 7 */ }; static unsigned long utf8_to_uv(const char *p, long *lenp) { int c = *p++ & 0xff; unsigned long uv = c; long n; if (!(uv & 0x80)) { *lenp = 1; return uv; } if (!(uv & 0x40)) { *lenp = 1; rb_raise(rb_eArgError, "malformed UTF-8 character"); } if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } else { *lenp = 1; rb_raise(rb_eArgError, "malformed UTF-8 character"); } if (n > *lenp) { rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)", n, *lenp); } *lenp = n--; if (n != 0) { while (n--) { c = *p++ & 0xff; if ((c & 0xc0) != 0x80) { *lenp -= n + 1; rb_raise(rb_eArgError, "malformed UTF-8 character"); } else { c &= 0x3f; uv = uv << 6 | c; } } } n = *lenp - 1; if (uv < utf8_limits[n]) { rb_raise(rb_eArgError, "redundant UTF-8 sequence"); } return uv; } #include "pack.rbinc" void Init_pack(void) { id_associated = rb_make_internal_id(); }