Make sure the work space memory is properly aligned

We use the heap allocators to avoid having more than one implementation of the alignment logic. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@19 632fc199-4ca6-4c93-a231-07263d6284db
2009-03-09 13:29:37 +00:00 · 2009-03-09 13:29:37 +00:00 · 35c4719638
--- a/jcdctmgr.c
+++ b/jcdctmgr.c
@ -53,12 +53,16 @@ typedef struct {
   */
  DCTELEM * divisors[NUM_QUANT_TBLS];

+  /* work area for FDCT subroutine */
+  DCTELEM * workspace;
+
 #ifdef DCT_FLOAT_SUPPORTED
  /* Same as above for the floating-point case. */
  float_DCT_method_ptr float_dct;
  float_convsamp_method_ptr float_convsamp;
  float_quantize_method_ptr float_quantize;
  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
+  FAST_FLOAT * float_workspace;
 #endif
 } my_fdct_controller;

@ -403,10 +407,11 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
  /* This routine is heavily used, so it's worth coding it tightly. */
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
-  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
+  DCTELEM * workspace;
  JDIMENSION bi;

  /* Make sure the compiler doesn't look up these every pass */
+  workspace = fdct->workspace;
  forward_DCT_method_ptr do_dct = fdct->dct;
  convsamp_method_ptr do_convsamp = fdct->convsamp;
  quantize_method_ptr do_quantize = fdct->quantize;
@ -492,10 +497,12 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
  /* This routine is heavily used, so it's worth coding it tightly. */
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
-  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  FAST_FLOAT * workspace;
  JDIMENSION bi;

+
  /* Make sure the compiler doesn't look up these every pass */
+  workspace = fdct->float_workspace;
  float_DCT_method_ptr do_dct = fdct->float_dct;
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
@ -603,6 +610,18 @@ jinit_forward_dct (j_compress_ptr cinfo)
    break;
  }

+  /* Allocate workspace memory */
+#ifdef DCT_FLOAT_SUPPORTED
+  if (cinfo->dct_method == JDCT_FLOAT)
+    fdct->float_workspace = (FAST_FLOAT *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(FAST_FLOAT) * DCTSIZE2);
+  else
+#endif
+    fdct->workspace = (DCTELEM *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(DCTELEM) * DCTSIZE2);
+
  /* Mark divisor tables unallocated */
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
    fdct->divisors[i] = NULL;
--- a/jdcoefct.c
+++ b/jdcoefct.c
@ -47,6 +47,9 @@ typedef struct {
   */
  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];

+  /* Temporary workspace for one MCU */
+  JCOEF * workspace;
+
 #ifdef D_MULTISCAN_FILES_SUPPORTED
  /* In multi-pass modes, we need a virtual block array for each component. */
  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
@ -471,13 +474,16 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
  jpeg_component_info *compptr;
  inverse_DCT_method_ptr inverse_DCT;
  boolean first_row, last_row;
-  JBLOCK workspace;
+  JCOEF * workspace;
  int *coef_bits;
  JQUANT_TBL *quanttbl;
  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
  int Al, pred;

+  /* Keep a local variable to avoid looking it up more than once */
+  workspace = coef->workspace;
+
  /* Force some input to be done if we are getting ahead of the input. */
  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
 	 ! cinfo->inputctl->eoi_reached) {
@ -733,4 +739,9 @@ jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
    coef->pub.decompress_data = decompress_onepass;
    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
  }
+
+  /* Allocate the workspace buffer */
+  coef->workspace = (JCOEF *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                SIZEOF(JCOEF) * DCTSIZE2);
 }