emscripten/third_party/mini-lz4.js

/*
MiniLZ4: Minimal LZ4 block decoding and encoding.

based off of node-lz4, https://github.com/pierrec/node-lz4

====
Copyright (c) 2012 Pierre Curto

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
====

changes have the same license
*/

var MiniLZ4 = (function() {

var exports = {};

/**
 * Decode a block. Assumptions: input contains all sequences of a
 * chunk, output is large enough to receive the decoded data.
 * If the output buffer is too small, an error will be thrown.
 * If the returned value is negative, an error occured at the returned offset.
 *
 * @param {ArrayBufferView} input input data
 * @param {ArrayBufferView} output output data
 * @param {number=} sIdx
 * @param {number=} eIdx
 * @return {number} number of decoded bytes
 * @private
 */
exports.uncompress = function (input, output, sIdx, eIdx) {
	sIdx = sIdx || 0
	eIdx = eIdx || (input.length - sIdx)
	// Process each sequence in the incoming data
	for (var i = sIdx, n = eIdx, j = 0; i < n;) {
		var token = input[i++]

		// Literals
		var literals_length = (token >> 4)
		if (literals_length > 0) {
			// length of literals
			var l = literals_length + 240
			while (l === 255) {
				l = input[i++]
				literals_length += l
			}

			// Copy the literals
			var end = i + literals_length
			while (i < end) output[j++] = input[i++]

			// End of buffer?
			if (i === n) return j
		}

		// Match copy
		// 2 bytes offset (little endian)
		var offset = input[i++] | (input[i++] << 8)

		// XXX 0 is an invalid offset value
		if (offset === 0) return j
		if (offset > j) return -(i-2)

		// length of match copy
		var match_length = (token & 0xf)
		var l = match_length + 240
		while (l === 255) {
			l = input[i++]
			match_length += l
		}

		// Copy the match
		var pos = j - offset // position of the match copy in the current output
		var end = j + match_length + 4 // minmatch = 4
		while (j < end) output[j++] = output[pos++]
	}

	return j
}

var
	maxInputSize	= 0x7E000000
,	minMatch		= 4
// uint32() optimization
,	hashLog			= 16
,	hashShift		= (minMatch * 8) - hashLog
,	hashSize		= 1 << hashLog

,	copyLength		= 8
,	lastLiterals	= 5
,	mfLimit			= copyLength + minMatch
,	skipStrength	= 6

,	mlBits  		= 4
,	mlMask  		= (1 << mlBits) - 1
,	runBits 		= 8 - mlBits
,	runMask 		= (1 << runBits) - 1

,	hasher 			= /* XXX uint32( */ 2654435761 /* ) */

assert(hashShift === 16);
var hashTable = new Int16Array(1<<16);
var empty = new Int16Array(hashTable.length);

// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
exports.compressBound = function (isize) {
	return isize > maxInputSize
		? 0
		: (isize + (isize/255) + 16) | 0
}

/** @param {number=} sIdx
	@param {number=} eIdx */
exports.compress = function (src, dst, sIdx, eIdx) {
	hashTable.set(empty);
	return compressBlock(src, dst, 0, sIdx || 0, eIdx || dst.length)
}

function compressBlock (src, dst, pos, sIdx, eIdx) {
	// XXX var Hash = uint32() // Reusable unsigned 32 bits integer
	var dpos = sIdx
	var dlen = eIdx - sIdx
	var anchor = 0

	if (src.length >= maxInputSize) throw new Error("input too large")

	// Minimum of input bytes for compression (LZ4 specs)
	if (src.length > mfLimit) {
		var n = exports.compressBound(src.length)
		if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n)

		var
			step  = 1
		,	findMatchAttempts = (1 << skipStrength) + 3
		// Keep last few bytes incompressible (LZ4 specs):
		// last 5 bytes must be literals
		,	srcLength = src.length - mfLimit

		while (pos + minMatch < srcLength) {
			// Find a match
			// min match of 4 bytes aka sequence
			var sequenceLowBits = src[pos+1]<<8 | src[pos]
			var sequenceHighBits = src[pos+3]<<8 | src[pos+2]
			// compute hash for the current sequence
			var hash = Math.imul(sequenceLowBits | (sequenceHighBits << 16), hasher) >>> hashShift;
			/* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits)
							.multiply(hasher)
							.shiftr(hashShift)
							.toNumber() */
			// get the position of the sequence matching the hash
			// NB. since 2 different sequences may have the same hash
			// it is double-checked below
			// do -1 to distinguish between initialized and uninitialized values
			var ref = hashTable[hash] - 1
			// save position of current sequence in hash table
			hashTable[hash] = pos + 1

			// first reference or within 64k limit or current sequence !== hashed one: no match
			if ( ref < 0 ||
				((pos - ref) >>> 16) > 0 ||
				(
					((src[ref+3]<<8 | src[ref+2]) != sequenceHighBits) ||
					((src[ref+1]<<8 | src[ref]) != sequenceLowBits )
				)
			) {
				// increase step if nothing found within limit
				step = findMatchAttempts++ >> skipStrength
				pos += step
				continue
			}

			findMatchAttempts = (1 << skipStrength) + 3

			// got a match
			var literals_length = pos - anchor
			var offset = pos - ref

			// minMatch already verified
			pos += minMatch
			ref += minMatch

			// move to the end of the match (>=minMatch)
			var match_length = pos
			while (pos < srcLength && src[pos] == src[ref]) {
				pos++
				ref++
			}

			// match length
			match_length = pos - match_length

			// token
			var token = match_length < mlMask ? match_length : mlMask

			// encode literals length
			if (literals_length >= runMask) {
				// add match length to the token
				dst[dpos++] = (runMask << mlBits) + token
				for (var len = literals_length - runMask; len > 254; len -= 255) {
					dst[dpos++] = 255
				}
				dst[dpos++] = len
			} else {
				// add match length to the token
				dst[dpos++] = (literals_length << mlBits) + token
			}

			// write literals
			for (var i = 0; i < literals_length; i++) {
				dst[dpos++] = src[anchor+i]
			}

			// encode offset
			dst[dpos++] = offset
			dst[dpos++] = (offset >> 8)

			// encode match length
			if (match_length >= mlMask) {
				match_length -= mlMask
				while (match_length >= 255) {
					match_length -= 255
					dst[dpos++] = 255
				}

				dst[dpos++] = match_length
			}

			anchor = pos
		}
	}

	// cannot compress input
	if (anchor == 0) return 0

	// Write last literals
	// encode literals length
	literals_length = src.length - anchor
	if (literals_length >= runMask) {
		// add match length to the token
		dst[dpos++] = (runMask << mlBits)
		for (var ln = literals_length - runMask; ln > 254; ln -= 255) {
			dst[dpos++] = 255
		}
		dst[dpos++] = ln
	} else {
		// add match length to the token
		dst[dpos++] = (literals_length << mlBits)
	}

	// write literals
	pos = anchor
	while (pos < src.length) {
		dst[dpos++] = src[pos++]
	}

	return dpos
}

exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea

exports.compressPackage = function(data, verify) {
  if (verify) {
    var temp = new Uint8Array(exports.CHUNK_SIZE);
  }
  // compress the data in chunks
  assert(data instanceof ArrayBuffer);
  data = new Uint8Array(data);
  console.log('compressing package of size ' + data.length);
  var compressedChunks = [];
  var successes = [];
  var offset = 0;
  var total = 0;
  while (offset < data.length) {
    var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE);
    //console.log('compress a chunk ' + [offset, total, data.length]);
    offset += exports.CHUNK_SIZE;
    var bound = exports.compressBound(chunk.length);
    var compressed = new Uint8Array(bound);
    var compressedSize = exports.compress(chunk, compressed);
    if (compressedSize > 0) {
      assert(compressedSize <= bound);
      compressed = compressed.subarray(0, compressedSize);
      compressedChunks.push(compressed);
      total += compressedSize;
      successes.push(1);
      if (verify) {
        var back = exports.uncompress(compressed, temp);
        assert(back === chunk.length, [back, chunk.length]);
        for (var i = 0; i < chunk.length; i++) {
          assert(chunk[i] === temp[i]);
        }
      }
    } else {
      assert(compressedSize === 0);
      // failure to compress :(
      compressedChunks.push(chunk);
      total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size
      successes.push(0);
    }
  }
  data = null; // XXX null out pack['data'] too?
  var compressedData = {
    'data': new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array
    'cachedOffset': total,
    'cachedIndexes': [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing
    'cachedChunks': [null, null],
    'offsets': [], // chunk# => start in compressed data
    'sizes': [],
    'successes': successes, // 1 if chunk is compressed
  };
  offset = 0;
  for (var i = 0; i < compressedChunks.length; i++) {
    compressedData['data'].set(compressedChunks[i], offset);
    compressedData['offsets'][i] = offset;
    compressedData['sizes'][i] = compressedChunks[i].length
    offset += compressedChunks[i].length;
  }
  console.log('compressed package into ' + [compressedData['data'].length]);
  assert(offset === total);
  return compressedData;
};

assert(exports.CHUNK_SIZE < (1 << 15)); // we use 16-bit ints as the type of the hash table, chunk size must be smaller

return exports;

})();