Make sure the work space memory is properly aligned

We use the heap allocators to avoid having more than one implementation of the alignment logic. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@19 632fc199-4ca6-4c93-a231-07263d6284db
2009-03-09 13:29:37 +00:00 · 2009-03-09 13:29:37 +00:00 · 35c4719638
--- a/jcdctmgr.c
+++ b/jcdctmgr.c
@ -53,12 +53,16 @@ typedef struct {
   */
  DCTELEM * divisors[NUM_QUANT_TBLS];
  /* work area for FDCT subroutine */
  DCTELEM * workspace;
 #ifdef DCT_FLOAT_SUPPORTED
  /* Same as above for the floating-point case. */
  float_DCT_method_ptr float_dct;
  float_convsamp_method_ptr float_convsamp;
  float_quantize_method_ptr float_quantize;
  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
  FAST_FLOAT * float_workspace;
 #endif
 } my_fdct_controller;
@ -403,10 +407,11 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
  /* This routine is heavily used, so it's worth coding it tightly. */
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
-  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
+  DCTELEM * workspace;
  JDIMENSION bi;
  /* Make sure the compiler doesn't look up these every pass */
  workspace = fdct->workspace;
  forward_DCT_method_ptr do_dct = fdct->dct;
  convsamp_method_ptr do_convsamp = fdct->convsamp;
  quantize_method_ptr do_quantize = fdct->quantize;
@ -492,10 +497,12 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
  /* This routine is heavily used, so it's worth coding it tightly. */
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
-  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  FAST_FLOAT * workspace;
  JDIMENSION bi;
  /* Make sure the compiler doesn't look up these every pass */
  workspace = fdct->float_workspace;
  float_DCT_method_ptr do_dct = fdct->float_dct;
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
@ -603,6 +610,18 @@ jinit_forward_dct (j_compress_ptr cinfo)
    break;
  }
  /* Allocate workspace memory */
 #ifdef DCT_FLOAT_SUPPORTED
  if (cinfo->dct_method == JDCT_FLOAT)
    fdct->float_workspace = (FAST_FLOAT *)
      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				  SIZEOF(FAST_FLOAT) * DCTSIZE2);
  else
 #endif
    fdct->workspace = (DCTELEM *)
      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				  SIZEOF(DCTELEM) * DCTSIZE2);
  /* Mark divisor tables unallocated */
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
    fdct->divisors[i] = NULL;
--- a/jdcoefct.c
+++ b/jdcoefct.c
@ -47,6 +47,9 @@ typedef struct {
   */
  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
  /* Temporary workspace for one MCU */
  JCOEF * workspace;
 #ifdef D_MULTISCAN_FILES_SUPPORTED
  /* In multi-pass modes, we need a virtual block array for each component. */
  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
@ -471,13 +474,16 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
  jpeg_component_info *compptr;
  inverse_DCT_method_ptr inverse_DCT;
  boolean first_row, last_row;
-  JBLOCK workspace;
+  JCOEF * workspace;
  int *coef_bits;
  JQUANT_TBL *quanttbl;
  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
  int Al, pred;
  /* Keep a local variable to avoid looking it up more than once */
  workspace = coef->workspace;
  /* Force some input to be done if we are getting ahead of the input. */
  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
 	 ! cinfo->inputctl->eoi_reached) {
@ -733,4 +739,9 @@ jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
    coef->pub.decompress_data = decompress_onepass;
    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
  }
  /* Allocate the workspace buffer */
  coef->workspace = (JCOEF *)
    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
                                SIZEOF(JCOEF) * DCTSIZE2);
 }