mozjpeg/jcphuff.c

1124 строки
33 KiB
C

/*
* jcphuff.c
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1995-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
* Copyright (C) 2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited.
* Copyright (C) 2021, Alex Richardson.
* Copyright (C) 2014, Mozilla Corporation.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
* This file contains Huffman entropy encoding routines for progressive JPEG.
*
* We do not support output suspension in this module, since the library
* currently does not allow multiple-scan files to be written with output
* suspension.
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jsimd.h"
#include <limits.h>
#ifdef HAVE_INTRIN_H
#include <intrin.h>
#ifdef _MSC_VER
#ifdef HAVE_BITSCANFORWARD64
#pragma intrinsic(_BitScanForward64)
#endif
#ifdef HAVE_BITSCANFORWARD
#pragma intrinsic(_BitScanForward)
#endif
#endif
#endif
#ifdef C_PROGRESSIVE_SUPPORTED
/*
* NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
* used for bit counting rather than the lookup table. This will reduce the
* memory footprint by 64k, which is important for some mobile applications
* that create many isolated instances of libjpeg-turbo (web browsers, for
* instance.) This may improve performance on some mobile platforms as well.
* This feature is enabled by default only on Arm processors, because some x86
* chips have a slow implementation of bsr, and the use of clz/bsr cannot be
* shown to have a significant performance impact even on the x86 chips that
* have a fast implementation of it. When building for Armv6, you can
* explicitly disable the use of clz/bsr by adding -mthumb to the compiler
* flags (this defines __thumb__).
*/
/* NOTE: Both GCC and Clang define __GNUC__ */
#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
defined(_M_ARM) || defined(_M_ARM64)
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
#endif
#ifdef USE_CLZ_INTRINSIC
#if defined(_MSC_VER) && !defined(__clang__)
#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
#else
#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
#endif
#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
#else
#include "jpeg_nbits_table.h"
#define JPEG_NBITS(x) (jpeg_nbits_table[x])
#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
#endif
/* Expanded entropy encoder object for progressive Huffman encoding. */
typedef struct {
struct jpeg_entropy_encoder pub; /* public fields */
/* Pointer to routine to prepare data for encode_mcu_AC_first() */
void (*AC_first_prepare) (const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, UJCOEF *values, size_t *zerobits);
/* Pointer to routine to prepare data for encode_mcu_AC_refine() */
int (*AC_refine_prepare) (const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, UJCOEF *absvalues, size_t *bits);
/* Mode flag: TRUE for optimization, FALSE for actual data output */
boolean gather_statistics;
/* Bit-level coding status.
* next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
*/
JOCTET *next_output_byte; /* => next byte to write in buffer */
size_t free_in_buffer; /* # of byte spaces remaining in buffer */
size_t put_buffer; /* current bit-accumulation buffer */
int put_bits; /* # of bits now in it */
j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */
/* Coding status for DC components */
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
/* Coding status for AC components */
int ac_tbl_no; /* the table number of the single component */
unsigned int EOBRUN; /* run length of EOBs */
unsigned int BE; /* # of buffered correction bits before MCU */
char *bit_buffer; /* buffer for correction bits (1 per char) */
/* packing correction bits tightly would save some space but cost time... */
unsigned int restarts_to_go; /* MCUs left in this restart interval */
int next_restart_num; /* next restart number to write (0-7) */
/* Pointers to derived tables (these workspaces have image lifespan).
* Since any one scan codes only DC or only AC, we only need one set
* of tables, not one for DC and one for AC.
*/
c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
/* Statistics tables for optimization; again, one set is enough */
long *count_ptrs[NUM_HUFF_TBLS];
} phuff_entropy_encoder;
typedef phuff_entropy_encoder *phuff_entropy_ptr;
/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
* buffer can hold. Larger sizes may slightly improve compression, but
* 1000 is already well into the realm of overkill.
* The minimum safe size is 64 bits.
*/
#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */
/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
* We assume that int right shift is unsigned if JLONG right shift is,
* which should be safe.
*/
#ifdef RIGHT_SHIFT_IS_UNSIGNED
#define ISHIFT_TEMPS int ishift_temp;
#define IRIGHT_SHIFT(x,shft) \
((ishift_temp = (x)) < 0 ? \
(ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
(ishift_temp >> (shft)))
#else
#define ISHIFT_TEMPS
#define IRIGHT_SHIFT(x,shft) ((x) >> (shft))
#endif
#define PAD(v, p) ((v + (p) - 1) & (~((p) - 1)))
/* Forward declarations */
METHODDEF(boolean) encode_mcu_DC_first (j_compress_ptr cinfo,
JBLOCKROW *MCU_data);
METHODDEF(void) encode_mcu_AC_first_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
UJCOEF *values, size_t *zerobits);
METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
JBLOCKROW *MCU_data);
METHODDEF(boolean) encode_mcu_DC_refine (j_compress_ptr cinfo,
JBLOCKROW *MCU_data);
METHODDEF(int) encode_mcu_AC_refine_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
UJCOEF *absvalues, size_t *bits);
METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
JBLOCKROW *MCU_data);
METHODDEF(void) finish_pass_phuff (j_compress_ptr cinfo);
METHODDEF(void) finish_pass_gather_phuff (j_compress_ptr cinfo);
/* Count bit loop zeroes */
INLINE
METHODDEF(int)
count_zeroes(size_t *x)
{
#if defined(HAVE_BUILTIN_CTZL)
int result;
result = __builtin_ctzl(*x);
*x >>= result;
#elif defined(HAVE_BITSCANFORWARD64)
unsigned long result;
_BitScanForward64(&result, *x);
*x >>= result;
#elif defined(HAVE_BITSCANFORWARD)
unsigned long result;
_BitScanForward(&result, *x);
*x >>= result;
#else
int result = 0;
while ((*x & 1) == 0) {
++result;
*x >>= 1;
}
#endif
return (int)result;
}
/*
* Initialize for a Huffman-compressed scan using progressive JPEG.
*/
METHODDEF(void)
start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics)
{
phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
boolean is_DC_band;
int ci, tbl;
jpeg_component_info *compptr;
entropy->cinfo = cinfo;
entropy->gather_statistics = gather_statistics;
is_DC_band = (cinfo->Ss == 0);
/* We assume jcmaster.c already validated the scan parameters. */
/* Select execution routines */
if (cinfo->Ah == 0) {
if (is_DC_band)
entropy->pub.encode_mcu = encode_mcu_DC_first;
else
entropy->pub.encode_mcu = encode_mcu_AC_first;
if (jsimd_can_encode_mcu_AC_first_prepare())
entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
else
entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
} else {
if (is_DC_band)
entropy->pub.encode_mcu = encode_mcu_DC_refine;
else {
entropy->pub.encode_mcu = encode_mcu_AC_refine;
if (jsimd_can_encode_mcu_AC_refine_prepare())
entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
else
entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
/* AC refinement needs a correction bit buffer */
if (entropy->bit_buffer == NULL)
entropy->bit_buffer = (char *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
MAX_CORR_BITS * sizeof(char));
}
}
if (gather_statistics)
entropy->pub.finish_pass = finish_pass_gather_phuff;
else
entropy->pub.finish_pass = finish_pass_phuff;
/* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
* for AC coefficients.
*/
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
compptr = cinfo->cur_comp_info[ci];
/* Initialize DC predictions to 0 */
entropy->last_dc_val[ci] = 0;
/* Get table index */
if (is_DC_band) {
if (cinfo->Ah != 0) /* DC refinement needs no table */
continue;
tbl = compptr->dc_tbl_no;
} else {
entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
}
if (gather_statistics) {
/* Check for invalid table index */
/* (make_c_derived_tbl does this in the other path) */
if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
/* Allocate and zero the statistics tables */
/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
if (entropy->count_ptrs[tbl] == NULL)
entropy->count_ptrs[tbl] = (long *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
257 * sizeof(long));
memset(entropy->count_ptrs[tbl], 0, 257 * sizeof(long));
if (cinfo->master->trellis_passes) {
/* When generating tables for trellis passes, make sure that all */
/* codewords have an assigned length */
int i, j;
for (i = 0; i < 16; i++)
for (j = 0; j < 12; j++)
entropy->count_ptrs[tbl][16 * i + j] = 1;
}
} else {
/* Compute derived values for Huffman table */
/* We may do this more than once for a table, but it's not expensive */
jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
& entropy->derived_tbls[tbl]);
}
}
/* Initialize AC stuff */
entropy->EOBRUN = 0;
entropy->BE = 0;
/* Initialize bit buffer to empty */
entropy->put_buffer = 0;
entropy->put_bits = 0;
/* Initialize restart stuff */
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num = 0;
}
/* Outputting bytes to the file.
* NB: these must be called only when actually outputting,
* that is, entropy->gather_statistics == FALSE.
*/
/* Emit a byte */
#define emit_byte(entropy, val) { \
*(entropy)->next_output_byte++ = (JOCTET)(val); \
if (--(entropy)->free_in_buffer == 0) \
dump_buffer(entropy); \
}
LOCAL(void)
dump_buffer (phuff_entropy_ptr entropy)
/* Empty the output buffer; we do not support suspension in this module. */
{
struct jpeg_destination_mgr *dest = entropy->cinfo->dest;
if (! (*dest->empty_output_buffer) (entropy->cinfo))
ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
/* After a successful buffer dump, must reset buffer pointers */
entropy->next_output_byte = dest->next_output_byte;
entropy->free_in_buffer = dest->free_in_buffer;
}
/* Outputting bits to the file */
/* Only the right 24 bits of put_buffer are used; the valid bits are
* left-justified in this part. At most 16 bits can be passed to emit_bits
* in one call, and we never retain more than 7 bits in put_buffer
* between calls, so 24 bits are sufficient.
*/
LOCAL(void)
emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size)
/* Emit some bits, unless we are in gather mode */
{
/* This routine is heavily used, so it's worth coding tightly. */
register size_t put_buffer = (size_t) code;
register int put_bits = entropy->put_bits;
/* if size is 0, caller used an invalid Huffman table entry */
if (size == 0)
ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
if (entropy->gather_statistics)
return; /* do nothing if we're only getting stats */
put_buffer &= (((size_t) 1)<<size) - 1; /* mask off any extra bits in code */
put_bits += size; /* new number of bits in buffer */
put_buffer <<= 24 - put_bits; /* align incoming bits */
put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
while (put_bits >= 8) {
int c = (int) ((put_buffer >> 16) & 0xFF);
emit_byte(entropy, c);
if (c == 0xFF) { /* need to stuff a zero byte? */
emit_byte(entropy, 0);
}
put_buffer <<= 8;
put_bits -= 8;
}
entropy->put_buffer = put_buffer; /* update variables */
entropy->put_bits = put_bits;
}
LOCAL(void)
flush_bits (phuff_entropy_ptr entropy)
{
emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
entropy->put_buffer = 0; /* and reset bit-buffer to empty */
entropy->put_bits = 0;
}
/*
* Emit (or just count) a Huffman symbol.
*/
LOCAL(void)
emit_symbol (phuff_entropy_ptr entropy, int tbl_no, int symbol)
{
if (entropy->gather_statistics)
entropy->count_ptrs[tbl_no][symbol]++;
else {
c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];
emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
}
}
/*
* Emit bits from a correction bit buffer.
*/
LOCAL(void)
emit_buffered_bits (phuff_entropy_ptr entropy, char *bufstart,
unsigned int nbits)
{
if (entropy->gather_statistics)
return; /* no real work */
while (nbits > 0) {
emit_bits(entropy, (unsigned int) (*bufstart), 1);
bufstart++;
nbits--;
}
}
/*
* Emit any pending EOBRUN symbol.
*/
LOCAL(void)
emit_eobrun (phuff_entropy_ptr entropy)
{
register int temp, nbits;
if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */
temp = entropy->EOBRUN;
nbits = JPEG_NBITS_NONZERO(temp) - 1;
/* safety check: shouldn't happen given limited correction-bit buffer */
if (nbits > 14)
ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
if (nbits)
emit_bits(entropy, entropy->EOBRUN, nbits);
entropy->EOBRUN = 0;
/* Emit any buffered correction bits */
emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
entropy->BE = 0;
}
}
/*
* Emit a restart marker & resynchronize predictions.
*/
LOCAL(void)
emit_restart (phuff_entropy_ptr entropy, int restart_num)
{
int ci;
emit_eobrun(entropy);
if (! entropy->gather_statistics) {
flush_bits(entropy);
emit_byte(entropy, 0xFF);
emit_byte(entropy, JPEG_RST0 + restart_num);
}
if (entropy->cinfo->Ss == 0) {
/* Re-initialize DC predictions to 0 */
for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
entropy->last_dc_val[ci] = 0;
} else {
/* Re-initialize all AC-related fields to 0 */
entropy->EOBRUN = 0;
entropy->BE = 0;
}
}
/*
* MCU encoding for DC initial scan (either spectral selection,
* or first pass of successive approximation).
*/
METHODDEF(boolean)
encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
register int temp, temp2, temp3;
register int nbits;
int blkn, ci;
int Al = cinfo->Al;
JBLOCKROW block;
jpeg_component_info *compptr;
ISHIFT_TEMPS
entropy->next_output_byte = cinfo->dest->next_output_byte;
entropy->free_in_buffer = cinfo->dest->free_in_buffer;
/* Emit restart marker if needed */
if (cinfo->restart_interval)
if (entropy->restarts_to_go == 0)
emit_restart(entropy, entropy->next_restart_num);
/* Encode the MCU data blocks */
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
block = MCU_data[blkn];
ci = cinfo->MCU_membership[blkn];
compptr = cinfo->cur_comp_info[ci];
/* Compute the DC value after the required point transform by Al.
* This is simply an arithmetic right shift.
*/
temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al);
/* DC differences are figured on the point-transformed values. */
temp = temp2 - entropy->last_dc_val[ci];
entropy->last_dc_val[ci] = temp2;
/* Encode the DC coefficient difference per section G.1.2.1 */
/* This is a well-known technique for obtaining the absolute value without
* a branch. It is derived from an assembly language technique presented
* in "How to Optimize for the Pentium Processors", Copyright (c) 1996,
* 1997 by Agner Fog.
*/
temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
temp ^= temp3;
temp -= temp3; /* temp is abs value of input */
/* For a negative input, want temp2 = bitwise complement of abs(input) */
temp2 = temp ^ temp3;
/* Find the number of bits needed for the magnitude of the coefficient */
nbits = JPEG_NBITS(temp);
/* Check for out-of-range coefficient values.
* Since we're encoding a difference, the range limit is twice as much.
*/
if (nbits > MAX_COEF_BITS+1)
ERREXIT(cinfo, JERR_BAD_DCT_COEF);
/* Count/emit the Huffman-coded symbol for the number of bits */
emit_symbol(entropy, compptr->dc_tbl_no, nbits);
/* Emit that number of bits of the value, if positive, */
/* or the complement of its magnitude, if negative. */
if (nbits) /* emit_bits rejects calls with size 0 */
emit_bits(entropy, (unsigned int) temp2, nbits);
}
cinfo->dest->next_output_byte = entropy->next_output_byte;
cinfo->dest->free_in_buffer = entropy->free_in_buffer;
/* Update restart-interval state too */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0) {
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num++;
entropy->next_restart_num &= 7;
}
entropy->restarts_to_go--;
}
return TRUE;
}
/*
* Data preparation for encode_mcu_AC_first().
*/
#define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \
for (k = 0; k < Sl; k++) { \
temp = block[jpeg_natural_order_start[k]]; \
if (temp == 0) \
continue; \
/* We must apply the point transform by Al. For AC coefficients this \
* is an integer division with rounding towards 0. To do this portably \
* in C, we shift after obtaining the absolute value; so the code is \
* interwoven with finding the abs value (temp) and output bits (temp2). \
*/ \
temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
temp ^= temp2; \
temp -= temp2; /* temp is abs value of input */ \
temp >>= Al; /* apply the point transform */ \
/* Watch out for case that nonzero coef is zero after point transform */ \
if (temp == 0) \
continue; \
/* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
temp2 ^= temp; \
values[k] = (UJCOEF)temp; \
values[k + DCTSIZE2] = (UJCOEF)temp2; \
zerobits |= ((size_t)1U) << k; \
} \
}
METHODDEF(void)
encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, UJCOEF *values, size_t *bits)
{
register int k, temp, temp2;
size_t zerobits = 0U;
int Sl0 = Sl;
#if SIZEOF_SIZE_T == 4
if (Sl0 > 32)
Sl0 = 32;
#endif
COMPUTE_ABSVALUES_AC_FIRST(Sl0);
bits[0] = zerobits;
#if SIZEOF_SIZE_T == 4
zerobits = 0U;
if (Sl > 32) {
Sl -= 32;
jpeg_natural_order_start += 32;
values += 32;
COMPUTE_ABSVALUES_AC_FIRST(Sl);
}
bits[1] = zerobits;
#endif
}
/*
* MCU encoding for AC initial scan (either spectral selection,
* or first pass of successive approximation).
*/
#define ENCODE_COEFS_AC_FIRST(label) { \
while (zerobits) { \
r = count_zeroes(&zerobits); \
cvalue += r; \
label \
temp = cvalue[0]; \
temp2 = cvalue[DCTSIZE2]; \
\
/* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
while (r > 15) { \
emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
r -= 16; \
} \
\
/* Find the number of bits needed for the magnitude of the coefficient */ \
nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \
/* Check for out-of-range coefficient values */ \
if (nbits > MAX_COEF_BITS) \
ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
\
/* Count/emit Huffman symbol for run length / number of bits */ \
emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
\
/* Emit that number of bits of the value, if positive, */ \
/* or the complement of its magnitude, if negative. */ \
emit_bits(entropy, (unsigned int)temp2, nbits); \
\
cvalue++; \
zerobits >>= 1; \
} \
}
METHODDEF(boolean)
encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
register int temp, temp2;
register int nbits, r;
int Sl = cinfo->Se - cinfo->Ss + 1;
int Al = cinfo->Al;
UJCOEF values_unaligned[2 * DCTSIZE2 + 15];
UJCOEF *values;
const UJCOEF *cvalue;
size_t zerobits;
size_t bits[8 / SIZEOF_SIZE_T];
entropy->next_output_byte = cinfo->dest->next_output_byte;
entropy->free_in_buffer = cinfo->dest->free_in_buffer;
/* Emit restart marker if needed */
if (cinfo->restart_interval)
if (entropy->restarts_to_go == 0)
emit_restart(entropy, entropy->next_restart_num);
#ifdef WITH_SIMD
cvalue = values = (UJCOEF *)PAD((JUINTPTR)values_unaligned, 16);
#else
/* Not using SIMD, so alignment is not needed */
cvalue = values = values_unaligned;
#endif
/* Prepare data */
entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
Sl, Al, values, bits);
zerobits = bits[0];
#if SIZEOF_SIZE_T == 4
zerobits |= bits[1];
#endif
/* Emit any pending EOBRUN */
if (zerobits && (entropy->EOBRUN > 0))
emit_eobrun(entropy);
#if SIZEOF_SIZE_T == 4
zerobits = bits[0];
#endif
/* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
ENCODE_COEFS_AC_FIRST((void)0;);
#if SIZEOF_SIZE_T == 4
zerobits = bits[1];
if (zerobits) {
int diff = ((values + DCTSIZE2 / 2) - cvalue);
r = count_zeroes(&zerobits);
r += diff;
cvalue += r;
goto first_iter_ac_first;
}
ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);
#endif
if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */
entropy->EOBRUN++; /* count an EOB */
if (entropy->EOBRUN == 0x7FFF)
emit_eobrun(entropy); /* force it out to avoid overflow */
}
cinfo->dest->next_output_byte = entropy->next_output_byte;
cinfo->dest->free_in_buffer = entropy->free_in_buffer;
/* Update restart-interval state too */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0) {
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num++;
entropy->next_restart_num &= 7;
}
entropy->restarts_to_go--;
}
return TRUE;
}
/*
* MCU encoding for DC successive approximation refinement scan.
* Note: we assume such scans can be multi-component, although the spec
* is not very clear on the point.
*/
METHODDEF(boolean)
encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
register int temp;
int blkn;
int Al = cinfo->Al;
JBLOCKROW block;
entropy->next_output_byte = cinfo->dest->next_output_byte;
entropy->free_in_buffer = cinfo->dest->free_in_buffer;
/* Emit restart marker if needed */
if (cinfo->restart_interval)
if (entropy->restarts_to_go == 0)
emit_restart(entropy, entropy->next_restart_num);
/* Encode the MCU data blocks */
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
block = MCU_data[blkn];
/* We simply emit the Al'th bit of the DC coefficient value. */
temp = (*block)[0];
emit_bits(entropy, (unsigned int) (temp >> Al), 1);
}
cinfo->dest->next_output_byte = entropy->next_output_byte;
cinfo->dest->free_in_buffer = entropy->free_in_buffer;
/* Update restart-interval state too */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0) {
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num++;
entropy->next_restart_num &= 7;
}
entropy->restarts_to_go--;
}
return TRUE;
}
/*
* Data preparation for encode_mcu_AC_refine().
*/
#define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \
/* It is convenient to make a pre-pass to determine the transformed \
* coefficients' absolute values and the EOB position. \
*/ \
for (k = 0; k < Sl; k++) { \
temp = block[jpeg_natural_order_start[k]]; \
/* We must apply the point transform by Al. For AC coefficients this \
* is an integer division with rounding towards 0. To do this portably \
* in C, we shift after obtaining the absolute value. \
*/ \
temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
temp ^= temp2; \
temp -= temp2; /* temp is abs value of input */ \
temp >>= Al; /* apply the point transform */ \
if (temp != 0) { \
zerobits |= ((size_t)1U) << k; \
signbits |= ((size_t)(temp2 + 1)) << k; \
} \
absvalues[k] = (UJCOEF)temp; /* save abs value for main pass */ \
if (temp == 1) \
EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \
} \
}
METHODDEF(int)
encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, UJCOEF *absvalues, size_t *bits)
{
register int k, temp, temp2;
int EOB = 0;
size_t zerobits = 0U, signbits = 0U;
int Sl0 = Sl;
#if SIZEOF_SIZE_T == 4
if (Sl0 > 32)
Sl0 = 32;
#endif
COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);
bits[0] = zerobits;
#if SIZEOF_SIZE_T == 8
bits[1] = signbits;
#else
bits[2] = signbits;
zerobits = 0U;
signbits = 0U;
if (Sl > 32) {
Sl -= 32;
jpeg_natural_order_start += 32;
absvalues += 32;
COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);
}
bits[1] = zerobits;
bits[3] = signbits;
#endif
return EOB;
}
/*
* MCU encoding for AC successive approximation refinement scan.
*/
#define ENCODE_COEFS_AC_REFINE(label) { \
while (zerobits) { \
idx = count_zeroes(&zerobits); \
r += idx; \
cabsvalue += idx; \
signbits >>= idx; \
label \
/* Emit any required ZRLs, but not if they can be folded into EOB */ \
while (r > 15 && (cabsvalue <= EOBPTR)) { \
/* emit any pending EOBRUN and the BE correction bits */ \
emit_eobrun(entropy); \
/* Emit ZRL */ \
emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
r -= 16; \
/* Emit buffered correction bits that must be associated with ZRL */ \
emit_buffered_bits(entropy, BR_buffer, BR); \
BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
BR = 0; \
} \
\
temp = *cabsvalue++; \
\
/* If the coef was previously nonzero, it only needs a correction bit. \
* NOTE: a straight translation of the spec's figure G.7 would suggest \
* that we also need to test r > 15. But if r > 15, we can only get here \
* if k > EOB, which implies that this coefficient is not 1. \
*/ \
if (temp > 1) { \
/* The correction bit is the next bit of the absolute value. */ \
BR_buffer[BR++] = (char)(temp & 1); \
signbits >>= 1; \
zerobits >>= 1; \
continue; \
} \
\
/* Emit any pending EOBRUN and the BE correction bits */ \
emit_eobrun(entropy); \
\
/* Count/emit Huffman symbol for run length / number of bits */ \
emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \
\
/* Emit output bit for newly-nonzero coef */ \
temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \
emit_bits(entropy, (unsigned int)temp, 1); \
\
/* Emit buffered correction bits that must be associated with this code */ \
emit_buffered_bits(entropy, BR_buffer, BR); \
BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
BR = 0; \
r = 0; /* reset zero run length */ \
signbits >>= 1; \
zerobits >>= 1; \
} \
}
METHODDEF(boolean)
encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
register int temp, r, idx;
char *BR_buffer;
unsigned int BR;
int Sl = cinfo->Se - cinfo->Ss + 1;
int Al = cinfo->Al;
UJCOEF absvalues_unaligned[DCTSIZE2 + 15];
UJCOEF *absvalues;
const UJCOEF *cabsvalue, *EOBPTR;
size_t zerobits, signbits;
size_t bits[16 / SIZEOF_SIZE_T];
entropy->next_output_byte = cinfo->dest->next_output_byte;
entropy->free_in_buffer = cinfo->dest->free_in_buffer;
/* Emit restart marker if needed */
if (cinfo->restart_interval)
if (entropy->restarts_to_go == 0)
emit_restart(entropy, entropy->next_restart_num);
#ifdef WITH_SIMD
cabsvalue = absvalues = (UJCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
#else
/* Not using SIMD, so alignment is not needed */
cabsvalue = absvalues = absvalues_unaligned;
#endif
/* Prepare data */
EOBPTR = absvalues +
entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
Sl, Al, absvalues, bits);
/* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
r = 0; /* r = run length of zeros */
BR = 0; /* BR = count of buffered bits added now */
BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
zerobits = bits[0];
#if SIZEOF_SIZE_T == 8
signbits = bits[1];
#else
signbits = bits[2];
#endif
ENCODE_COEFS_AC_REFINE((void)0;);
#if SIZEOF_SIZE_T == 4
zerobits = bits[1];
signbits = bits[3];
if (zerobits) {
int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
idx = count_zeroes(&zerobits);
signbits >>= idx;
idx += diff;
r += idx;
cabsvalue += idx;
goto first_iter_ac_refine;
}
ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);
#endif
r |= (int)((absvalues + Sl) - cabsvalue);
if (r > 0 || BR > 0) { /* If there are trailing zeroes, */
entropy->EOBRUN++; /* count an EOB */
entropy->BE += BR; /* concat my correction bits to older ones */
/* We force out the EOB if we risk either:
* 1. overflow of the EOB counter;
* 2. overflow of the correction bit buffer during the next MCU.
*/
if (entropy->EOBRUN == 0x7FFF ||
entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))
emit_eobrun(entropy);
}
cinfo->dest->next_output_byte = entropy->next_output_byte;
cinfo->dest->free_in_buffer = entropy->free_in_buffer;
/* Update restart-interval state too */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0) {
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num++;
entropy->next_restart_num &= 7;
}
entropy->restarts_to_go--;
}
return TRUE;
}
/*
* Finish up at the end of a Huffman-compressed progressive scan.
*/
METHODDEF(void)
finish_pass_phuff (j_compress_ptr cinfo)
{
phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
entropy->next_output_byte = cinfo->dest->next_output_byte;
entropy->free_in_buffer = cinfo->dest->free_in_buffer;
/* Flush out any buffered data */
emit_eobrun(entropy);
flush_bits(entropy);
cinfo->dest->next_output_byte = entropy->next_output_byte;
cinfo->dest->free_in_buffer = entropy->free_in_buffer;
}
/*
* Finish up a statistics-gathering pass and create the new Huffman tables.
*/
METHODDEF(void)
finish_pass_gather_phuff (j_compress_ptr cinfo)
{
phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
boolean is_DC_band;
int ci, tbl;
jpeg_component_info *compptr;
JHUFF_TBL **htblptr;
boolean did[NUM_HUFF_TBLS];
/* Flush out buffered data (all we care about is counting the EOB symbol) */
emit_eobrun(entropy);
is_DC_band = (cinfo->Ss == 0);
/* It's important not to apply jpeg_gen_optimal_table more than once
* per table, because it clobbers the input frequency counts!
*/
memset(did, 0, sizeof(did));
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
compptr = cinfo->cur_comp_info[ci];
if (is_DC_band) {
if (cinfo->Ah != 0) /* DC refinement needs no table */
continue;
tbl = compptr->dc_tbl_no;
} else {
tbl = compptr->ac_tbl_no;
}
if (! did[tbl]) {
if (is_DC_band)
htblptr = & cinfo->dc_huff_tbl_ptrs[tbl];
else
htblptr = & cinfo->ac_huff_tbl_ptrs[tbl];
if (*htblptr == NULL)
*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
did[tbl] = TRUE;
}
}
}
/*
* Module initialization routine for progressive Huffman entropy encoding.
*/
GLOBAL(void)
jinit_phuff_encoder (j_compress_ptr cinfo)
{
phuff_entropy_ptr entropy;
int i;
entropy = (phuff_entropy_ptr)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
sizeof(phuff_entropy_encoder));
cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
entropy->pub.start_pass = start_pass_phuff;
/* Mark tables unallocated */
for (i = 0; i < NUM_HUFF_TBLS; i++) {
entropy->derived_tbls[i] = NULL;
entropy->count_ptrs[i] = NULL;
}
entropy->bit_buffer = NULL; /* needed only in AC refinement scan */
}
#endif /* C_PROGRESSIVE_SUPPORTED */