Implement Additional Quantized Utils (#192)

* C_refernce functions(8) are added, Compilation done, No-warnings * Function/loop/Condition coding style has been updated * Remaining functions has been integrated, Conv,Convolution,Maxpool functions are yet to update * Remaining functions are converted to C,(maxpool,convolution,conv) Compilation done, No-warning. * Allignment and coding style updated. * Unit test cases are added, Compilation done,Test done,No-warnings * Review comments updated, added function header with more descriptive, Test cases results compared with c++ code and fixed some issues,Code allignment,Variable names are modified. Compilation done with 0 warnings * Unit test cases for all functions has been added,Compilation done, 0 warnings. * Fixed review comments Signed-off-by: harsh dave <harsh.dave@vvdntech.in> * Added shift operation for a division operation * Resolved review comments. Fixed examples with correct scaling factor, Function description correction * Added test cases for with SHIFT and FLOATEXP macros enabled. Modification in function definations when SHIFT macro is defined. * Correction made in sigmoid function * SHIFT operation is enabled by default * SHIFT operation is enabled by default * Revert Some Changes * Remove Whitespaces * Clean Up Nomenclature and Simplify API * Reorder Functions; Implement v_q_sub_scalar(); Improve Tests and Documentation * Integrate Calls to v_q_treesum() * Remove q_convolution() and q_maxpool(); Enforce Better Type Checks * Incorporate Review Co-authored-by: Arun KV <arun.kv@vvdntech.in> Co-authored-by: harsh dave <harsh.dave@vvdntech.in>
2020-07-17 22:30:43 +05:30 · 2020-07-17 22:30:43 +05:30 · 9f68e66e46
--- a/c_reference/include/quantized_utils.h
+++ b/c_reference/include/quantized_utils.h
@ -53,15 +53,266 @@ void v_q_tanh(const INT_T* const vec, ITER_T len, INT_T* const ret,
 // Function for adding a scalar to every element of a vector.
 void v_q_scalar_add(INT_T scalar, const INT_T* const vec, ITER_T len,
                    INT_T* const ret, SCALE_T scscalar, SCALE_T scvec, SCALE_T scret);
-// Function for subtracting every element of a vector from a scalar.
-// The resultant vector has elements C_{i} = A - B_{i}.
+// Function for subtracting every element of a vector B from a scalar a.
+// The resultant vector has elements C_{i} = a - B_{i}.
 void v_q_scalar_sub(INT_T scalar, const INT_T* const vec, ITER_T len,
                    INT_T* const ret, SCALE_T scscalar, SCALE_T scvec, SCALE_T scret);
+// Function for subtracting a scalar b from every element of a vector A.
+// The resultant vector has elements C_{i} = A_{i} - b.
+void v_q_sub_scalar(const INT_T* const vec, INT_T scalar, ITER_T len,
+                    INT_T* const ret, SCALE_T scvec, SCALE_T scscalar, SCALE_T scret);
 // Function for multiplying a scalar to every element of a vector.
 void v_q_scalar_mul(INT_T scalar, const INT_T* const vec, ITER_T len,
                    INT_T* const ret, SCALE_T scscalar, SCALE_T scvec);
+/**
+ * @brief Finds the index of largest element in a vector.
+ * @param[in]       vec       pointer to input vector
+ * @param[in]       len       length of the vector
+ * @param[out]      ret       pointer variable storing the index of the largest element in the vector
+ * @return          none
+ * @example         vec       = {12, 24, 54, 1, 2, 10}
+ *                  *ret      = 2
+ */
+void v_q_argmax(const INT_T* const vec, ITER_T len, ITER_T* const ret);
+/**
+ * @brief Replace any negative element present in the vector withs zero.
+ * Note: No saturation is done here, and hence, the output might overflow with a large input.
+ * @param[in, out]  vec       pointer to vector on which element-wise ReLU operation is to be applied
+ * @param[in]       len       length of the input vector
+ * @return          none
+ * @example         vec       = {1324, -5453, 3454, -3435, 8789}
+ *                  len       = 4
+ *                  vec       = {1324, 0, 3454, 0, 8789}
+ */
+void v_q_relu(INT_T* const vec, ITER_T len);
+/**
+ * @brief Computes exponentiation of all elements in the vec (interpreted as a floating-point value) to the base e and stores the result in ret.
+ * Note: No saturation is done here, and hence, the output might overflow with a large input.
+ * @param[in]       vec       pointer to vector whose exponential scaling is to be performed
+ * @param[in]       len       length of the vector
+ * @param[in]       scvec     scaling factor for input vector
+ * @param[in]       scret     scaling factor for output vector
+ * @param[out]      ret       pointer to the output vector
+ * @return          none
+ * @example         formula   = exp((float)vec_{i} / scvec) * scret
+ *                  vec       = {13, 54, 34, 35, 87}
+ *                  len       = 5
+ *                  scvec     = 8
+ *                  scret     = 8
+ *                  ret       = {40, 6832, 560, 635, 29493}
+ */
+void v_q_exp(const INT_T* const vec, ITER_T len, INT_T* const ret,
+             SCALE_T scvec, SCALE_T scret);
+/**
+ * @brief Performs element-wise up-scaling on a vector.
+ * @param[in, out]  vec       pointer to the vector on which up-scaling is to be performed
+ * @param[in]       len       length of the vector
+ * @param[in]       scvec     scaling factor of the vector
+ * @return          none
+ * @example         vec       = {423, -987, -2342, 1232}
+ *                  len       = 4
+ *                  scvec     = 10
+ *                  mat       = {4230, -9870, -23420, 12320}
+ */
+void v_q_scale_up(INT_T* const vec, ITER_T len, SCALE_T scvec);
+/**
+ * @brief Performs element-wise down-scaling on a vector.
+ * @param[in, out]  vec       pointer to the vector on which down-scaling is to be performed
+ * @param[in]       len       length of the vector
+ * @param[in]       scvec     scaling factor of the vector
+ * @return          none
+ * @example         vec       = {4232, -9879, -2342, 1232}
+ *                  len       = 4
+ *                  scvec     = 37
+ *                  mat       = {114, -267, -63, 33}
+ */
+void v_q_scale_down(INT_T* const vec, ITER_T len, SCALE_T scvec);
+
+/**
+ * @brief Performs the transpose on the input matrix.
+ * @param[in]       mat       pointer to the input matrix which is to be transposed
+ * @param[in]       nrows     number of rows of output matrix
+ * @param[in]       ncols     number of columns of output matrix
+ * @param[out]      ret       pointer to the output matrix which will hold the transpose
+ * @return          none
+ * @example         mat       = { {1, 2},
+ *                                {4, 5} }
+ *                  ret       = { {1, 4},
+ *                                {2, 5} }
+ *
+ * @example         mat       = { {1, 2, 3},
+ *                                {4, 5, 6} }
+ *                  ret       = { {1,  4},
+ *                                {2,  5},
+ *                                {3,  6} }
+ */
+void m_q_transpose(const INT_T* const mat, ITER_T nrows, ITER_T ncols,
+                   INT_T* const ret);
+/**
+ * @brief Performs the row-order or the column-order reversal of the 2-D input matrix.
+ * @param[in]       mat       pointer to the (row / column-major) input matrix on which reversal is to be performed
+ * @param[in]       nrows     number of rows of the input matrix
+ * @param[in]       ncols     number of columns of the input matrix
+ * @param[in]       axis      axis of reversal; 0 for reversal along rows and 1 for reversal along columns
+ * @param[out]      mat_out   pointer to the output matrix
+ * @return          none
+ * @example         mat       = { {1, 2},
+ *                                {4, 5} }
+ *                  nrows     = 2
+ *                  ncols     = 2
+ *                  axis      = 0
+ *                  ret       = { {4, 5},
+ *                                {1, 2} }
+ */
+void m_q_reverse(const INT_T* const mat, ITER_T nrows, ITER_T ncols,
+                 ITER_T axis, INT_T* const ret);
+/**
+ * @brief Performs the column-wise addition of a bias term to the input matrix.
+ * dim(mat) = dim(ret) = [nrows][ncols]; dim(vec) = [ncols].
+ * @param[in]       mat       pointer to the input matrix on which addition is to be performed
+ * @param[in]       vec       pointer to the bias vector which is to be added
+ * @param[in]       nrows     number of rows of the input matrix
+ * @param[in]       ncols     number of columns of the input matrix
+ * @param[out]      ret       pointer to the output matrix
+ * @param[in]       scmat     scaling factor for the input matrix
+ * @param[in]       scvec     scaling factor for the bias vector
+ * @param[in]       scret     scaling factor for the output matrix
+ * @return          none
+ * @example         mat       = {1324, 5453, 3454, 3435, 8789, 3411, 5412, 8934}
+ *                  vec       = {8452, 2341, 9383, 2353}
+ *                  nrows     = 4
+ *                  ncols     = 2
+ *                  ret       = {2775, 3311, 4072, 2305, 6507, 2290, 5051, 5055}
+ *                  scmat     = 1
+ *                  scvec     = 2
+ *                  scret     = 2
+ */
+void m_q_add_vec(const INT_T* const mat, const INT_T* const vec,
+                 ITER_T nrows, ITER_T ncols, INT_T* const ret,
+                 SCALE_T scmat, SCALE_T scvec, SCALE_T scret);
+/**
+ * @brief Performs the column-wise subtraction of a bias term from the input matrix.
+ * dim(mat) = dim(ret) = [nrows][ncols]; dim(vec) = [ncols].
+ * @param[in]       mat       pointer to the input matrix from which subtraction is to be performed
+ * @param[in]       vec       pointer to the bias vector which is to be subtracted
+ * @param[in]       nrows     number of rows of the input matrix
+ * @param[in]       ncols     number of columns of the input matrix
+ * @param[out]      ret       pointer to the output matrix
+ * @param[in]       scmat     scaling factor for the input matrix
+ * @param[in]       scvec     scaling factor for the bias vector
+ * @param[in]       scret     scaling factor for the output matrix
+ * @return          none
+ * @example         mat       = {1324, 5453, 3454, 3435, 8789, 3411, 5412, 8934}
+ *                  vec       = {8452, 2341, 9383, 2353}
+ *                  nrows     = 4
+ *                  ncols     = 2
+ *                  ret       = {-1451, 2141, -618, 1129, 2281, 1120, 361, 3879}
+ *                  scmat     = 1
+ *                  scvec     = 2
+ *                  scret     = 2
+ */
+void m_q_sub_vec(const INT_T* const mat, const INT_T* const vec,
+                 ITER_T nrows, ITER_T ncols, INT_T* const ret,
+                 SCALE_T scmat, SCALE_T scvec, SCALE_T scret);
 // Function for multiplying a matrix with a vector.
 void m_q_mulvec(const INT_T* const mat, const INT_T* const vec, ITER_T nrows,
                ITER_T ncols, INT_T* const ret, SCALE_T scmat, SCALE_T scvec,
                SCALE_T H1, SCALE_T H2);
+/**
+ * @brief Performs sparse matrix multiplication of a matrix and a vector.
+ * col_indices and mat_values combined are a sparse representation; dim(vec) = [ndims].
+ * mat_values[i] is the i^th non-zero value of the input matrix, and col_indices[i] encodes the location of mat_values[i].
+ * Number of zeroes before col_indices[i] : row of mat_values[i]
+ * col_indices[i - l] where l is the number of zeroes before col_indices[i]: column of mat_values[i]
+ * @param[in]       col_indices  pointer to input matrix which stores the column indices of non-zero values of matrix A
+ * @param[in]       mat_values   pointer to input matrix which stores the non-zero values of matrix A
+ * @param[in]       vec          pointer to the input vector
+ * @param[in]       ndims        dimension of the multiplication vector
+ * @param[out]      ret          pointer to the output matrix
+ * @param[in]       scmat        scale factor of the input matrix
+ * @param[in]       scvec        scale factor of the input vector
+ * @param[in]       scret        scale factor of the output matrix
+   @return          none
+ * @example         mat          = {{10, 20, 30, 40, 50, 60, 70, 0, 0, 0, 0, 0, 0, 0},
+ *                                  {0, 80, 0, 90, 0, 100, 0, 110, 0, 120, 0, 130, 0, 140}}
+ *                  col_indices  = {1, 2, 3, 4, 5, 6, 7, 0, 2, 4, 6, 8, 10, 12, 14, 0}
+ *                  mat_values   = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140}
+ *                  vec          = {1, 2}
+ *                  ndims        = 2
+ *                  scmat        = 1
+ *                  scvec        = 2
+ *                  scret        = 4
+ *                  ret          = {1, 22, 3, 27, 6, 32, 8, 27, 0, 30, 0, 32, 0, 35}
+ */
+void m_q_sparse_mulvec(const ITER_T* const col_indices, const INT_T* const mat_values,
+                       const INT_T* const vec, ITER_T ndims, INT_T* const ret,
+                       SCALE_T scmat, SCALE_T scvec, SCALE_T scret);
+
+/**
+ * @brief Performs the channel-wise addition of a bias term to the input tensor.
+ * dim(mat) = dim(ret) = [nbatches][nrows][ncols][nchannels]; dim(vec) = [nchannels].
+ * @param[in]       mat       pointer to the input tensor on which addition is to be performed
+ * @param[in]       vec       pointer to the bias vector which is to be added
+ * @param[in]       nbatches  number of batches of the input tensor
+ * @param[in]       nrows     number of rows of the input tensor
+ * @param[in]       ncols     number of columns of the input tensor
+ * @param[in]       nchannels number of channels of the input tensor
+ * @param[out]      ret       pointer to the output tensor
+ * @param[in]       scmat     scaling factor for the input tensor
+ * @param[in]       scvec     scaling factor for the bias vector
+ * @param[in]       scret     scaling factor for the output tensor
+ * @return          none
+ * @example         mat       = { { {1324, 5453}, {3454, 3435} },
+ *                                { {8789, 3411}, {5412, 8934} } },
+ *                              { { {6895, 1211}, {6790, 5425} },
+ *                                { {8976, 4539}, {9348, 9321} } }
+ *                  vec       = {8452, 2341}
+ *                  nrows     = 4
+ *                  ncols     = 2
+ *                  ret       = { { {2775, 3311}, {3840, 2302} },
+ *                                { {6507, 2290}, {4819, 5052} } },
+ *                              { { {5560, 1190}, {5508, 3297} },
+ *                                { {6601, 2854}, {6787, 5245} } }
+ *                  scmat     = 1
+ *                  scvec     = 2
+ *                  scret     = 2
+ */
+void t_q_add_vec(const INT_T* const mat, const INT_T* const vec,
+                 ITER_T nbatches, ITER_T nrows, ITER_T ncols,
+                 ITER_T nchannels, INT_T* const ret, SCALE_T scmat,
+                 SCALE_T scvec, SCALE_T scret);
+/**
+ * @brief Performs the channel-wise subtraction of a bias term from the input tensor.
+ * dim(mat) = dim(ret) = [nbatches][nrows][ncols][nchannels]; dim(vec) = [nchannels].
+ * @param[in]       mat       pointer to the input tensor from which subtraction is to be performed
+ * @param[in]       vec       pointer to the bias vector which is to be subtracted
+ * @param[in]       nbatches  number of batches of the input tensor
+ * @param[in]       nrows     number of rows of the input tensor
+ * @param[in]       ncols     number of columns of the input tensor
+ * @param[in]       nchannels number of channels of the input tensor
+ * @param[out]      ret       pointer to the output tensor
+ * @param[in]       scmat     scaling factor for the input tensor
+ * @param[in]       scvec     scaling factor for the bias vector
+ * @param[in]       scret     scaling factor for the output tensor
+ * @return          none
+ * @example         mat       = { { {1324, 5453}, {3454, 3435} },
+ *                                { {8789, 3411}, {5412, 8934} } },
+ *                              { { {6895, 1211}, {6790, 5425} },
+ *                                { {8976, 4539}, {9348, 9321} } }
+ *                  vec       = {8452, 2341}
+ *                  nrows     = 4
+ *                  ncols     = 2
+ *                  ret       = { { {-1451, 2141}, {-386, 1132} },
+                                  { {2281, 1120}, {593, 3882} } },
+                                { { {1334, 20}, {1282, 2127} },
+                                  { {2375, 1684}, {2561, 4075} } }
+ *                  scmat     = 1
+ *                  scvec     = 2
+ *                  scret     = 2
+ */
+void t_q_sub_vec(const INT_T* const ten, const INT_T* const vec,
+                 ITER_T nbatches, ITER_T nrows, ITER_T ncols,
+                 ITER_T nchannels, INT_T* const ret, SCALE_T scmat,
+                 SCALE_T scvec, SCALE_T scret);
+
 #endif
--- a/c_reference/src/quantized_mbconv.c
+++ b/c_reference/src/quantized_mbconv.c
@ -100,7 +100,7 @@ void q_mbconv_block(const INT_T* const input, const INT_T* const filter1,
          ITER_T counter = 0;
          for (S_ITER_T hf = -(HF >> 1); hf <= (HF >> 1); hf++) {
            for (S_ITER_T wf = -(WF >> 1); wf <= (WF >> 1); wf++) {
-              if (((h + hf) < 0) || ((h + hf) >= H) || ((w + wf) < 0) || ((w + wf) >= W)) {
+              if (((h + hf) < 0) || ((h + hf) >= (S_ITER_T)H) || ((w + wf) < 0) || ((w + wf) >= (S_ITER_T)W)) {
                treesumBuffer[counter] = 0;
              } else {
                treesumBuffer[counter] = ((INTM_T)convBuffer1[(((ITER_T)(h + hf)) % HF) * W * CTemp + ((ITER_T)(w + wf)) * CTemp + g]) *
--- a/c_reference/src/quantized_utils.c
+++ b/c_reference/src/quantized_utils.c
@ -128,6 +128,17 @@ void v_q_scalar_sub(INT_T scalar, const INT_T* const vec, ITER_T len,
  }
 }

+void v_q_sub_scalar(const INT_T* const vec, INT_T scalar, ITER_T len,
+                    INT_T* const ret, SCALE_T scvec, SCALE_T scscalar, SCALE_T scret) {
+  for (ITER_T i = 0; i < len; i++) {
+    #ifdef SHIFT
+      ret[i] = ((vec[i] >> (scvec + scret)) - (scalar >> (scscalar + scret)));
+    #else
+      ret[i] = ((vec[i] / scvec) / scret) - ((scalar / scscalar) / scret);
+    #endif
+  }
+}
+
 void v_q_scalar_mul(INT_T scalar, const INT_T* const vec, ITER_T len,
                    INT_T* const ret, SCALE_T scscalar, SCALE_T scvec) {
  for (ITER_T i = 0; i < len; i++) {
@ -139,22 +150,208 @@ void v_q_scalar_mul(INT_T scalar, const INT_T* const vec, ITER_T len,
  }
 }

+void v_q_argmax(const INT_T* const vec, ITER_T len, ITER_T* const ret) {
+  INT_T max_value = vec[0];
+  ITER_T max_index = 0;
+
+  for (ITER_T i = 1; i < len; i++) {
+    if (max_value < vec[i]) {
+      max_index = i;
+      max_value = vec[i];
+    }
+  }
+
+  *ret = max_index;
+}
+
+void v_q_relu(INT_T* const vec, ITER_T len) {
+  for (ITER_T i = 0; i < len; i++) {
+    if (vec[i] < 0) {
+      vec[i] = 0;
+    }
+  }
+}
+
+void v_q_exp(const INT_T* const vec, ITER_T len, INT_T* const ret,
+             SCALE_T scvec, SCALE_T scret) {
+  for (ITER_T i = 0; i < len; i++) {
+    ret[i] = ((INT_T)(exp(((float)vec[i]) / scvec) * scret));
+  }
+}
+
+void v_q_scale_up(INT_T* const vec, ITER_T len, SCALE_T scvec) {
+  for (ITER_T i = 0; i < len; i++) {
+    #ifdef SHIFT
+      vec[i] <<= scvec;
+    #else
+      vec[i] *= scvec;
+    #endif
+  }
+}
+
+void v_q_scale_down(INT_T* const vec, ITER_T len, SCALE_T scvec) {
+  for (ITER_T i = 0; i < len; i++) {
+    #ifdef SHIFT
+      vec[i] >>= scvec;
+    #else
+      vec[i] /= scvec;
+    #endif
+  }
+}
+
+void m_q_transpose(const INT_T* const mat, ITER_T nrows, ITER_T ncols,
+                   INT_T* const ret) {
+  ITER_T len = nrows * ncols, counter = 0;
+  for (ITER_T i = 0; i < len; i++) {
+    if (counter >= len) {
+      counter -= len - 1;
+    }
+
+    ret[i] = mat[counter];
+    counter += nrows;
+  }
+}
+
+void m_q_reverse(const INT_T* const mat, ITER_T nrows, ITER_T ncols, ITER_T axis,
+                 INT_T* const ret) {
+  ITER_T len = nrows * ncols;
+
+  if (axis == 0) {
+    ITER_T col_counter = 0, row_index = len - ncols;
+
+    for (ITER_T i = 0; i < len; i++) {
+      if (col_counter >= ncols) {
+        col_counter = 0;
+        row_index -= ncols;
+      }
+
+      ret[i] = mat[row_index + col_counter];
+      col_counter++;
+    }
+  } else {
+    S_ITER_T row_counter = ncols - 1;
+    ITER_T col_index = 0;
+
+    for (ITER_T i = 0; i < len; i++) {
+      if (row_counter < 0) {
+        row_counter = ncols - 1;
+        col_index += ncols;
+      }
+
+      ret[i] = mat[col_index + (ITER_T)row_counter];
+      row_counter--;
+    }
+  }
+}
+
+void m_q_add_vec(const INT_T* const mat, const INT_T* const vec,
+                 ITER_T nrows, ITER_T ncols, INT_T* const ret,
+                 SCALE_T scmat, SCALE_T scvec, SCALE_T scret) {
+  ITER_T len = nrows * ncols;
+  for (ITER_T i = 0, w = 0; i < len; i++, w++) {
+    if (w >= ncols) {
+      w = 0;
+    }
+
+    #ifdef SHIFT
+      ret[i] = ((mat[i] >> (scmat + scret)) + (vec[w] >> (scvec + scret)));
+    #else
+      ret[i] = ((mat[i] / scmat) / scret) + ((vec[w] / scvec) / scret);
+    #endif
+  }
+}
+
+void m_q_sub_vec(const INT_T* const mat, const INT_T* const vec,
+                 ITER_T nrows, ITER_T ncols, INT_T* const ret,
+                 SCALE_T scmat, SCALE_T scvec, SCALE_T scret) {
+  ITER_T len = nrows * ncols;
+  for (ITER_T i = 0, w = 0; i < len; i++, w++) {
+    if (w >= ncols) {
+      w = 0;
+    }
+
+    #ifdef SHIFT
+      ret[i] = ((mat[i] >> (scmat + scret)) - (vec[w] >> (scvec + scret)));
+    #else
+      ret[i] = ((mat[i] / scmat) / scret) - ((vec[w] / scvec) / scret);
+    #endif
+  }
+}
+
 void m_q_mulvec(const INT_T* const mat, const INT_T* const vec, ITER_T nrows,
                ITER_T ncols, INT_T* const ret, SCALE_T scmat, SCALE_T scvec,
                SCALE_T H1, SCALE_T H2) {
-  INTM_T tmp[ncols];
+  INTM_T treesumBuffer[ncols];
  for (ITER_T row = 0; row < nrows; row++) {
    INT_T* mat_offset = (INT_T*)mat + row * ncols;

    for (ITER_T col = 0; col < ncols; col++) {
-      tmp[col] = ((INTM_T)(*mat_offset++) * (INTM_T)vec[col]);
+      treesumBuffer[col] = ((INTM_T)(*mat_offset++) * (INTM_T)vec[col]);
    }

-    v_q_treesum(&tmp[0], ncols, H1, H2);
+    v_q_treesum(&treesumBuffer[0], ncols, H1, H2);
    #ifdef SHIFT
-      ret[row] = (tmp[0] >> (scmat + scvec));
+      ret[row] = (treesumBuffer[0] >> (scmat + scvec));
    #else
-      ret[row] = ((tmp[0] / scmat) / scvec);
+      ret[row] = ((treesumBuffer[0] / scmat) / scvec);
+    #endif
+  }
+}
+
+void m_q_sparse_mulvec(const ITER_T* const col_indices, const INT_T* const mat_values,
+                       const INT_T* const vec, ITER_T ndims, INT_T* const ret,
+                       SCALE_T scmat, SCALE_T scvec, SCALE_T scret) {
+  ITER_T iter_index = 0, iter_value = 0;
+  for (ITER_T k = 0; k < ndims; k++) {
+    ITER_T index = col_indices[iter_index];
+
+    while (index != 0) {
+      #ifdef SHIFT
+        ret[index - 1] += (((INTM_T)mat_values[iter_value] * (INTM_T)vec[k]) >> (scmat + scvec + scret));
+      #else
+        ret[index - 1] += (((INTM_T)mat_values[iter_value] * (INTM_T)vec[k]) / ((INTM_T)scmat * (INTM_T)scvec * (INTM_T)scret));
+      #endif
+      iter_index++;
+      iter_value++;
+      index = col_indices[iter_index];
+    }
+
+    iter_index++;
+  }
+}
+
+void t_q_add_vec(const INT_T* const mat, const INT_T* const vec,
+                 ITER_T nbatches, ITER_T nrows, ITER_T ncols,
+                 ITER_T nchannels, INT_T* const ret, SCALE_T scmat,
+                 SCALE_T scvec, SCALE_T scret) {
+  ITER_T len = nbatches * nrows * ncols * nchannels;
+  for (ITER_T i = 0, c = 0; i < len; i++, c++) {
+    if (c >= nchannels) {
+      c = 0;
+    }
+
+    #ifdef SHIFT
+      ret[i] = ((mat[i] >> (scmat + scret)) + (vec[c] >> (scvec + scret)));
+    #else
+      ret[i] = ((mat[i] / scmat) / scret) + ((vec[c] / scvec) / scret);
+    #endif
+  }
+}
+
+void t_q_sub_vec(const INT_T* const mat, const INT_T* const vec,
+                 ITER_T nbatches, ITER_T nrows, ITER_T ncols,
+                 ITER_T nchannels, INT_T* const ret, SCALE_T scmat,
+                 SCALE_T scvec, SCALE_T scret) {
+  ITER_T len = nbatches * nrows * ncols * nchannels;
+  for (ITER_T i = 0, c = 0; i < len; i++, c++) {
+    if (c >= nchannels) {
+      c = 0;
+    }
+
+    #ifdef SHIFT
+      ret[i] = ((mat[i] >> (scmat + scret)) - (vec[c] >> (scvec + scret)));
+    #else
+      ret[i] = ((mat[i] / scmat) / scret) - ((vec[c] / scvec) / scret);
    #endif
  }
 }
--- a/c_reference/tests/utils/test_quantized_utils.c
+++ b/c_reference/tests/utils/test_quantized_utils.c
@ -7,11 +7,12 @@
 // All values generated from Seedot on Wider Regression dataset.
 // By default, all tests run without using bit-shifting operations.
 // Function for matching the predicted and expected quantized outputs.
-int check_output(const INT_T* const pred, const INT_T* const expected,
-                 unsigned len) {
+static int check_output(const INT_T* const pred, const INT_T* const expected,
+                        unsigned len) {
  for (unsigned i = 0; i < len; i++)
  {
    if (pred[i] != expected[i]) {
+      printf("Output: %d, Expected: %d at Index: %d\n", pred[i], expected[i], i);
      return 1;
    }
  }
@ -21,20 +22,35 @@ int check_output(const INT_T* const pred, const INT_T* const expected,
 // Test v_q_treesum() function.
 int test_v_q_treesum() {
  INTM_T qvec_A[128] = {3038976, 0, 0, 1514478, 0, 0, 778261, 32670, -2619599, 0, 3849336, 5310900, 0, 0, 0, 0, 0, 0, 0, 0, -142898, 1510353, 0, -6888482, 0, -760720, 1296384, -6749490, -9687275, -686501, -743600, -2112105, 0, 8962408, 0, -17460547, -1477630, 0, 0, -2195694, -860184, -214912, 0, -1389548, 0, 0, 2081898, 0, 0, 23544, -3351768, 0, 0, 0, 3886614, 0, -5839384, 0, 842100, 4051917, 0, 0, 1459796, 2006850, 517867, 3044471, 0, 2578300, 0, 0, -1921101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4709000, 0, 0, 0, 0, -65664, 0, 0, 4313415, -1126680, 0, 0, -8524140, 0, 6248088, 0, 0, 0, 0, 2273558, 0, 0, -738913, 0, 0, -13912800, 0, 0, 0, -5329773, 5346088, 0, 113245, 0, 0, 8590397, 0, 2224368, 1020225, 489580, 0, -185584, -86475, 0, -2013258, -2417536, 0};
-  const INTM_T expected[1] = {-135837};
+
+  #ifdef SHIFT
+    INTM_T expected = {-135840};
+  #else
+    INTM_T expected = {-135837};
+  #endif

  v_q_treesum(&qvec_A[0], 128, 7, 0);
-  return check_output((const INT_T*)qvec_A, (const INT_T*)expected, 1);
+  if (qvec_A[0] != expected) {
+      printf("TreeSum Output: %d, Expected: %d\n", qvec_A[0], expected);
+      return 1;
+  }
+  return 0;
 }

 // Test v_q_add() function.
 int test_v_q_add() {
  const INT_T qvec_A[8] = {-425, -169, -3534, 524, -2739, 87, 52, 292};
  const INT_T qvec_B[8] = {-18777, -9518, 4055, -7309, 8584, -17257, -5280, -7933};
-  const INT_T expected[8] = {-2772, -1358, -3028, -389, -1666, -2070, -608, -699};
  INT_T pred[8];

-  v_q_add(&qvec_A[0], &qvec_B[0], 8, &pred[0], 1, 8, 1);
+  #ifdef SHIFT
+    const INT_T expected[8] = {-2773, -1359, -3028, -390, -1666, -2071, -608, -700};
+    v_q_add(&qvec_A[0], &qvec_B[0], 8, &pred[0], 0, 3, 0);
+  #else
+    const INT_T expected[8] = {-2772, -1358, -3028, -389, -1666, -2070, -608, -699};
+    v_q_add(&qvec_A[0], &qvec_B[0], 8, &pred[0], 1, 8, 1);
+  #endif
+
  return check_output(pred, expected, 8);
 }

@ -42,10 +58,16 @@ int test_v_q_add() {
 int test_v_q_sub() {
  const INT_T qvec_A[8] = {-425, -169, -3534, 524, -2739, 87, 52, 292};
  const INT_T qvec_B[8] = {-18777, -9518, 4055, -7309, 8584, -17257, -5280, -7933};
-  const INT_T expected[8] = {1922, 1020, -4040, 1437, -3812, 2244, 712, 1283};
  INT_T pred[8];

-  v_q_sub(&qvec_A[0], &qvec_B[0], 8, &pred[0], 1, 8, 1);
+  #ifdef SHIFT
+    const INT_T expected[8] = {1923, 1021, -4040, 1438, -3812, 2245, 712, 1284};
+    v_q_sub(&qvec_A[0], &qvec_B[0], 8, &pred[0], 0, 3, 0);
+  #else
+    const INT_T expected[8] = {1922, 1020, -4040, 1437, -3812, 2244, 712, 1283};
+    v_q_sub(&qvec_A[0], &qvec_B[0], 8, &pred[0], 1, 8, 1);
+  #endif
+
  return check_output(pred, expected, 8);
 }

@ -53,10 +75,16 @@ int test_v_q_sub() {
 int test_v_q_hadamard() {
  const INT_T qvec_A[8] = {16378, 13638, 16378, 9787, 14861, 16378, 10661, 11018};
  const INT_T qvec_B[8] = {178, 1064, -2048, 1718, -1663, 851, 1244, 1282};
-  const INT_T expected[8] = {1423, 7085, -16378, 8209, -12067, 6805, 6475, 6897};
  INT_T pred[8];

-  v_q_hadamard(&qvec_A[0], &qvec_B[0], 8, &pred[0], 32, 64);
+  #ifdef SHIFT
+    const INT_T expected[8] = {1423, 7085, -16378, 8209, -12068, 6805, 6475, 6897};
+    v_q_hadamard(&qvec_A[0], &qvec_B[0], 8, &pred[0], 5, 6);
+  #else
+    const INT_T expected[8] = {1423, 7085, -16378, 8209, -12067, 6805, 6475, 6897};
+    v_q_hadamard(&qvec_A[0], &qvec_B[0], 8, &pred[0], 32, 64);
+  #endif
+
  return check_output(pred, expected, 8);
 }

@ -87,7 +115,12 @@ int test_v_q_scalar_add() {
  const INT_T expected[8] = {16378, 13638, 16378, 9787, 14861, 16378, 10661, 11018};
  INT_T pred[8];

-  v_q_scalar_add(qscalar_A, &qvec_B[0], 8, &pred[0], 256, 1, 1);
+  #ifdef SHIFT
+    v_q_scalar_add(qscalar_A, &qvec_B[0], 8, &pred[0], 8, 0, 0);
+  #else
+    v_q_scalar_add(qscalar_A, &qvec_B[0], 8, &pred[0], 256, 1, 1);
+  #endif
+
  return check_output(pred, expected, 8);
 }

@ -98,7 +131,28 @@ int test_v_q_scalar_sub() {
  const INT_T expected[8] = {16384, 13624, 16384, 9744, 14856, 16384, 10624, 10984};
  INT_T pred[8];

-  v_q_scalar_sub(qscalar_A, &qvec_B[0], 8, &pred[0], 1, 1, 1);
+  #ifdef SHIFT
+    v_q_scalar_sub(qscalar_A, &qvec_B[0], 8, &pred[0], 0, 0, 0);
+  #else
+    v_q_scalar_sub(qscalar_A, &qvec_B[0], 8, &pred[0], 1, 1, 1);
+  #endif
+
+  return check_output(pred, expected, 8);
+}
+
+// Test v_q_sub_scalar() function.
+int test_v_q_sub_scalar() {
+  const INT_T qvec_A[8] = {0, 2760, 0, 6640, 1528, 0, 5760, 5400};
+  const INT_T qscalar_B = 16384;
+  const INT_T expected[8] = {-16384, -13624, -16384, -9744, -14856, -16384, -10624, -10984};
+  INT_T pred[8];
+
+  #ifdef SHIFT
+    v_q_sub_scalar(&qvec_A[0], qscalar_B, 8, &pred[0], 0, 0, 0);
+  #else
+    v_q_sub_scalar(&qvec_A[0], qscalar_B, 8, &pred[0], 1, 1, 1);
+  #endif
+
  return check_output(pred, expected, 8);
 }

@ -109,21 +163,245 @@ int test_v_q_scalar_mul() {
  const INT_T expected[8] = {16261, 13521, 16261, 9670, 14744, 16261, 10544, 10901};
  INT_T pred[8];

-  v_q_scalar_mul(qscalar_A, &qvec_B[0], 8, &pred[0], 128, 256);
+  #ifdef SHIFT
+    v_q_scalar_mul(qscalar_A, &qvec_B[0], 8, &pred[0], 7, 8);
+  #else
+    v_q_scalar_mul(qscalar_A, &qvec_B[0], 8, &pred[0], 128, 256);
+  #endif
+
  return check_output(pred, expected, 8);
 }

+// Test v_q_argmax() function.
+int test_v_q_argmax() {
+  const INT_T qvec_A[8] = {1675, 9870, -9876, -1234, 5674, 28765, 9876, 12654};
+  const ITER_T expected[1] = {5};
+  ITER_T pred[1];
+
+  v_q_argmax(&qvec_A[0], 8, &pred[0]);
+  return check_output((const INT_T*)pred, (const INT_T*)expected, 1);
+}
+
+// Test v_q_relu() function.
+int test_v_q_relu() {
+  INT_T qvec_A[16] = {-3648, 648, -2147, -2348, 1468, -4348, 3648, 3648, -648, 9648, 3778, 4743, 7483, -243, 8, -21};
+  const INT_T expected[16] = {0, 648, 0, 0, 1468, 0, 3648, 3648, 0, 9648, 3778, 4743, 7483, 0, 8, 0};
+
+  v_q_relu(&qvec_A[0], 16);
+  return check_output(qvec_A, expected, 16);
+}
+
+// Test v_q_exp() function.
+int test_v_q_exp() {
+  const INT_T qvec_A[16] = {13, 54, 34, 35, 87, 11, 41, 93, 89, 11, 90, 25, 76, 39, 48, 93};
+  const INT_T expected[16] = {40, 6832, 560, 635, 29493, 31, 1345, -22628, 18482, 31, 25215, 182, -24195, 1047, 3227, -22628};
+  INT_T pred[16];
+
+  v_q_exp(&qvec_A[0], 16, &pred[0], 8, 8);
+  return check_output(pred, expected, 16);
+}
+
+// Test v_q_scale_up() function.
+int test_v_q_scale_up() {
+  INT_T qvec_A[16] = {423, -987, -2342, 1232, -324, 843, 982, 2342, 343, 654, 987, 654, 567, 2876, 987, 1265};
+  INT_T expected[16] = {846, -1974, -4684, 2464, -648, 1686, 1964, 4684, 686, 1308, 1974, 1308, 1134, 5752, 1974, 2530};
+
+  #ifdef SHIFT
+    v_q_scale_up(&qvec_A[0], 16, 1);
+  #else
+    v_q_scale_up(&qvec_A[0], 16, 2);
+  #endif
+
+  return check_output(qvec_A, expected, 16);
+}
+
+// Test v_q_scale_down() function.
+int test_v_q_scale_down() {
+  INT_T qvec_A[16] = {4232, -9879, -2342, 1232, -3242, 8432, 9823, 2342, 343, 6543, 9876, 6542, 5674, 28765, 9876, 12654};
+
+  #ifdef SHIFT
+    const INT_T expected[16] = {2116, -4940, -1171, 616, -1621, 4216, 4911, 1171, 171, 3271, 4938, 3271, 2837, 14382, 4938, 6327};
+    v_q_scale_down(&qvec_A[0], 16, 1);
+  #else
+    const INT_T expected[16] = {2116, -4939, -1171, 616, -1621, 4216, 4911, 1171, 171, 3271, 4938, 3271, 2837, 14382, 4938, 6327};
+    v_q_scale_down(&qvec_A[0], 16, 2);
+  #endif
+
+  return check_output(qvec_A, expected, 16);
+}
+
+// Test m_q_transpose() function.
+int test_m_q_transpose() {
+  const INT_T qmat_A[4 * 3] = {1238, 5432, 1834, 6543, -5698, -2342, 9876, 5674, 8435, 6542, 7824, 3924};
+  const INT_T expected_A[3 * 4] = {1238, 6543, 9876, 6542, 5432, -5698, 5674, 7824, 1834, -2342, 8435, 3924};
+  INT_T pred_A[12];
+
+  const INT_T qmat_B[3 * 3] = {1238, 5432, 1834, 6543, -5698, -2342, 9876, 5674, 8435};
+  const INT_T expected_B[3 * 3] = {1238, 6543, 9876, 5432, -5698, 5674, 1834, -2342, 8435};
+  INT_T pred_B[9];
+
+  m_q_transpose(&qmat_A[0], 3, 4, &pred_A[0]);
+  m_q_transpose(&qmat_B[0], 3, 3, &pred_B[0]);
+
+  return (check_output(pred_A, expected_A, 12) || check_output(pred_B, expected_B, 9));
+}
+
+// Test m_q_reverse() function.
+int test_m_q_reverse() {
+  const INT_T qmat_A[4 * 4]= {4232, -9879, -2342, 1232, -3242, 8432, 9823, 2342, 343, 6543, 9876, 6542, 5674, 28765, 9876, 12654};
+  const INT_T expected_A[4 * 4] = {1232, -2342, -9879, 4232, 2342, 9823, 8432, -3242, 6542, 9876, 6543, 343, 12654, 9876, 28765, 5674};
+  const INT_T expected_B[4 * 4] = {5674, 28765, 9876, 12654, 343, 6543, 9876, 6542, -3242, 8432, 9823, 2342, 4232, -9879, -2342, 1232};
+  INT_T pred_A[16];
+  INT_T pred_B[16];
+
+  m_q_reverse(&qmat_A[0], 4, 4, 1, &pred_A[0]);
+  m_q_reverse(&qmat_A[0], 4, 4, 0, &pred_B[0]);
+
+  return check_output(pred_A, expected_A, 16) || check_output(pred_B, expected_B, 16);
+}
+
+// Test m_q_add_vec() function.
+int test_m_q_add_vec() {
+  const INT_T qmat_A[4 * 4] = {1324, 5453, 3454, 3435, 8789, 3411, 5412, 8934, 6895, 1211, 6790, 5425, 8976, 4539, 9348, 9321};
+  const INT_T qvec_B[4] = {8452, 2341, 9383, 2353};
+  const INT_T expected[16] = {2775, 3311, 4072, 2305, 6507, 2290, 5051, 5055, 5560, 1190, 5740, 3300, 6601, 2854, 7019, 5248};
+  INT_T pred[16];
+
+  #ifdef SHIFT
+    m_q_add_vec(&qmat_A[0], &qvec_B[0], 4, 4, &pred[0], 0, 1, 1);
+  #else
+    m_q_add_vec(&qmat_A[0], &qvec_B[0], 4, 4, &pred[0], 1, 2, 2);
+  #endif
+
+  return check_output(pred, expected, 16);
+}
+
+// Test m_q_sub_vec() function.
+int test_m_q_sub_vec() {
+  const INT_T qmat_A[4 * 4] = {1324, 5453, 3454, 3435, 8789, 3411, 5412, 8934, 6895, 1211, 6790, 5425, 8976, 4539, 9348, 9321};
+  const INT_T qvec_B[4] = {8452, 2341, 9383, 2353};
+  const INT_T expected[16] = {-1451, 2141, -618, 1129, 2281, 1120, 361, 3879, 1334, 20, 1050, 2124, 2375, 1684, 2329, 4072};
+  INT_T pred[16];
+
+  #ifdef SHIFT
+    m_q_sub_vec(&qmat_A[0], &qvec_B[0], 4, 4, &pred[0], 0, 1, 1);
+  #else
+    m_q_sub_vec(&qmat_A[0], &qvec_B[0], 4, 4, &pred[0], 1, 2, 2);
+  #endif
+
+  return check_output(pred, expected, 16);
+}
+
 // Test m_q_mulvec() function.
 int test_m_q_mulvec() {
  const INT_T qmat_A[8 * 4] = {7069, -10389, 1562, -1992, 3262, -37, -1143, -995, 5513, -17035, -14615, -6636, 4733, -403, 4106, -1104, -2707, -1287, -18128, -1832, -10108, -137, 2064, 1207, 5233, 226, 831, -1909, 4489, -1099, 2845, -1261};
  const INT_T qvec_B[4] = {1040, 1919, 4254, 4024};
-  const INT_T expected[8] = {-425, -169, -3534, 524, -2739, 87, 52, 292};
  INT_T pred[8];

-  m_q_mulvec(&qmat_A[0], &qvec_B[0], 8, 4, &pred[0], 128, 64, 2, 0);
+  #ifdef SHIFT
+    const INT_T expected[8] = {-426, -170, -3535, 524, -2740, 87, 52, 292};
+    m_q_mulvec(&qmat_A[0], &qvec_B[0], 8, 4, &pred[0], 7, 6, 2, 0);
+  #else
+    const INT_T expected[8] = {-425, -169, -3534, 524, -2739, 87, 52, 292};
+    m_q_mulvec(&qmat_A[0], &qvec_B[0], 8, 4, &pred[0], 128, 64, 2, 0);
+  #endif
+
  return check_output(pred, expected, 8);
 }

+// Test m_q_sparse_mulvec() function.
+int test_m_q_sparse_mulvec() {
+  const ITER_T qmat_A[16] = {1, 2, 3, 4, 5, 6, 7, 0, 2, 4, 6, 8, 10, 12, 14, 0};
+  const INT_T qmat_B[14] = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140};
+  const INT_T qvec_C[2] = {1, 2};
+  const INT_T expected[14] = {1, 22, 3, 27, 6, 32, 8, 27, 0, 30, 0, 32, 0, 35};
+  INT_T pred[14] = {};
+
+  #ifdef SHIFT
+    m_q_sparse_mulvec(&qmat_A[0], &qmat_B[0], &qvec_C[0], 2, &pred[0], 0, 1, 2);
+  #else
+    m_q_sparse_mulvec(&qmat_A[0], &qmat_B[0], &qvec_C[0], 2, &pred[0], 1, 2, 4);
+  #endif
+
+  return check_output(pred, expected, 14);
+}
+
+// Test t_q_add_vec() function.
+int test_t_q_add_vec() {
+  const INT_T qmat_A[2 * 2 * 2 * 2] = {1324, 5453,
+                                       3454, 3435,
+
+                                       8789, 3411,
+                                       5412, 8934,
+
+
+                                       6895, 1211,
+                                       6790, 5425,
+
+                                       8976, 4539,
+                                       9348, 9321};
+  const INT_T qvec_B[2] = {8452, 2341};
+  const INT_T expected[2 * 2 * 2 * 2] = {2775, 3311,
+                                         3840, 2302,
+
+                                         6507, 2290,
+                                         4819, 5052,
+
+
+                                         5560, 1190,
+                                         5508, 3297,
+
+                                         6601, 2854,
+                                         6787, 5245};
+  INT_T pred[16];
+
+  #ifdef SHIFT
+    t_q_add_vec(&qmat_A[0], &qvec_B[0], 2, 2, 2, 2, &pred[0], 0, 1, 1);
+  #else
+    t_q_add_vec(&qmat_A[0], &qvec_B[0], 2, 2, 2, 2, &pred[0], 1, 2, 2);
+  #endif
+
+  return check_output(pred, expected, 16);
+}
+
+// Test t_q_sub_vec() function.
+int test_t_q_sub_vec() {
+  const INT_T qmat_A[2 * 2 * 2 * 2] = {1324, 5453,
+                                       3454, 3435,
+
+                                       8789, 3411,
+                                       5412, 8934,
+
+
+                                       6895, 1211,
+                                       6790, 5425,
+
+                                       8976, 4539,
+                                       9348, 9321};
+  const INT_T qvec_B[2] = {8452, 2341};
+  const INT_T expected[2 * 2 * 2 * 2] = {-1451, 2141,
+                                         -386, 1132,
+
+                                         2281, 1120,
+                                         593, 3882,
+
+
+                                         1334, 20,
+                                         1282, 2127,
+
+                                         2375, 1684,
+                                         2561, 4075};
+  INT_T pred[16];
+
+  #ifdef SHIFT
+    t_q_sub_vec(&qmat_A[0], &qvec_B[0], 2, 2, 2, 2, &pred[0], 0, 1, 1);
+  #else
+    t_q_sub_vec(&qmat_A[0], &qvec_B[0], 2, 2, 2, 2, &pred[0], 1, 2, 2);
+  #endif
+
+  return check_output(pred, expected, 16);
+}
+
 int main() {
  if (test_v_q_treesum()) {
    printf("Test Failure for v_q_treesum()!\n");
@ -141,10 +419,36 @@ int main() {
    printf("Test Failure for v_q_scalar_add()!\n");
  } else if (test_v_q_scalar_sub()) {
    printf("Test Failure for v_q_scalar_sub()!\n");
+  } else if (test_v_q_sub_scalar()) {
+    printf("Test Failure for v_q_sub_scalar()!\n");
  } else if (test_v_q_scalar_mul()) {
    printf("Test Failure for v_q_scalar_mul()!\n");
+  } else if (test_v_q_argmax()) {
+    printf("Test Failure for v_q_argmax()!\n");
+  } else if (test_v_q_relu()) {
+    printf("Test Failure for v_q_relu()!\n");
+  } else if (test_v_q_exp()) {
+    printf("Test Failure for v_q_exp()!\n");
+  } else if (test_v_q_scale_up()) {
+    printf("Test Failure for v_q_scale_up()!\n");
+  } else if (test_v_q_scale_down()) {
+    printf("Test Failure for v_q_scale_down()!\n");
+  } else if (test_m_q_transpose()) {
+    printf("Test Failure for m_q_transpose()!\n");
+  } else if (test_m_q_reverse()) {
+    printf("Test Failure for m_q_reverse()!\n");
+  } else if (test_m_q_add_vec()) {
+    printf("Test Failure for m_q_add_vec()!\n");
+  } else if (test_m_q_sub_vec()) {
+    printf("Test Failure for m_q_sub_vec()!\n");
  } else if (test_m_q_mulvec()) {
    printf("Test Failure for m_q_mulvec()!\n");
+  } else if (test_m_q_sparse_mulvec()) {
+    printf("Test Failure for m_q_sparse_mulvec()!\n");
+  } else if (test_t_q_add_vec()) {
+    printf("Test Failure for t_q_add_vec()!\n");
+  } else if (test_t_q_sub_vec()) {
+    printf("Test Failure for t_q_sub_vec()!\n");
  } else {
    printf("All Tests Passed!\n");
    return 0;