Implement Additional Quantized Utils (#192)

* C_refernce functions(8) are added, Compilation done, No-warnings

* Function/loop/Condition coding style has been updated

* Remaining functions has been integrated, Conv,Convolution,Maxpool functions are yet to update

* Remaining functions are converted to C,(maxpool,convolution,conv) Compilation done, No-warning.

* Allignment and coding style updated.

* Unit test cases are added, Compilation done,Test done,No-warnings

* Review comments updated, added function header with more descriptive, Test cases results compared with c++ code and fixed some issues,Code allignment,Variable names are modified. Compilation done with 0 warnings

* Unit test cases for all functions has been added,Compilation done, 0 warnings.

* Fixed review comments

Signed-off-by: harsh dave <harsh.dave@vvdntech.in>

* Added shift operation for a division operation

* Resolved review comments. Fixed examples with correct scaling factor, Function description correction

* Added test cases for with SHIFT and FLOATEXP macros enabled. Modification in function definations when SHIFT macro is defined.

* Correction made in sigmoid function

* SHIFT operation is enabled by default

* SHIFT operation is enabled by default

* Revert Some Changes

* Remove Whitespaces

* Clean Up Nomenclature and Simplify API

* Reorder Functions; Implement v_q_sub_scalar(); Improve Tests and Documentation

* Integrate Calls to v_q_treesum()

* Remove q_convolution() and q_maxpool(); Enforce Better Type Checks

* Incorporate Review

Co-authored-by: Arun KV <arun.kv@vvdntech.in>
Co-authored-by: harsh dave <harsh.dave@vvdntech.in>
This commit is contained in:
Shikhar Jaiswal 2020-07-17 22:30:43 +05:30 коммит произвёл GitHub
Родитель 061881b854
Коммит 9f68e66e46
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 775 добавлений и 23 удалений

Просмотреть файл

@ -53,15 +53,266 @@ void v_q_tanh(const INT_T* const vec, ITER_T len, INT_T* const ret,
// Function for adding a scalar to every element of a vector.
void v_q_scalar_add(INT_T scalar, const INT_T* const vec, ITER_T len,
INT_T* const ret, SCALE_T scscalar, SCALE_T scvec, SCALE_T scret);
// Function for subtracting every element of a vector from a scalar.
// The resultant vector has elements C_{i} = A - B_{i}.
// Function for subtracting every element of a vector B from a scalar a.
// The resultant vector has elements C_{i} = a - B_{i}.
void v_q_scalar_sub(INT_T scalar, const INT_T* const vec, ITER_T len,
INT_T* const ret, SCALE_T scscalar, SCALE_T scvec, SCALE_T scret);
// Function for subtracting a scalar b from every element of a vector A.
// The resultant vector has elements C_{i} = A_{i} - b.
void v_q_sub_scalar(const INT_T* const vec, INT_T scalar, ITER_T len,
INT_T* const ret, SCALE_T scvec, SCALE_T scscalar, SCALE_T scret);
// Function for multiplying a scalar to every element of a vector.
void v_q_scalar_mul(INT_T scalar, const INT_T* const vec, ITER_T len,
INT_T* const ret, SCALE_T scscalar, SCALE_T scvec);
/**
* @brief Finds the index of largest element in a vector.
* @param[in] vec pointer to input vector
* @param[in] len length of the vector
* @param[out] ret pointer variable storing the index of the largest element in the vector
* @return none
* @example vec = {12, 24, 54, 1, 2, 10}
* *ret = 2
*/
void v_q_argmax(const INT_T* const vec, ITER_T len, ITER_T* const ret);
/**
* @brief Replace any negative element present in the vector withs zero.
* Note: No saturation is done here, and hence, the output might overflow with a large input.
* @param[in, out] vec pointer to vector on which element-wise ReLU operation is to be applied
* @param[in] len length of the input vector
* @return none
* @example vec = {1324, -5453, 3454, -3435, 8789}
* len = 4
* vec = {1324, 0, 3454, 0, 8789}
*/
void v_q_relu(INT_T* const vec, ITER_T len);
/**
* @brief Computes exponentiation of all elements in the vec (interpreted as a floating-point value) to the base e and stores the result in ret.
* Note: No saturation is done here, and hence, the output might overflow with a large input.
* @param[in] vec pointer to vector whose exponential scaling is to be performed
* @param[in] len length of the vector
* @param[in] scvec scaling factor for input vector
* @param[in] scret scaling factor for output vector
* @param[out] ret pointer to the output vector
* @return none
* @example formula = exp((float)vec_{i} / scvec) * scret
* vec = {13, 54, 34, 35, 87}
* len = 5
* scvec = 8
* scret = 8
* ret = {40, 6832, 560, 635, 29493}
*/
void v_q_exp(const INT_T* const vec, ITER_T len, INT_T* const ret,
SCALE_T scvec, SCALE_T scret);
/**
* @brief Performs element-wise up-scaling on a vector.
* @param[in, out] vec pointer to the vector on which up-scaling is to be performed
* @param[in] len length of the vector
* @param[in] scvec scaling factor of the vector
* @return none
* @example vec = {423, -987, -2342, 1232}
* len = 4
* scvec = 10
* mat = {4230, -9870, -23420, 12320}
*/
void v_q_scale_up(INT_T* const vec, ITER_T len, SCALE_T scvec);
/**
* @brief Performs element-wise down-scaling on a vector.
* @param[in, out] vec pointer to the vector on which down-scaling is to be performed
* @param[in] len length of the vector
* @param[in] scvec scaling factor of the vector
* @return none
* @example vec = {4232, -9879, -2342, 1232}
* len = 4
* scvec = 37
* mat = {114, -267, -63, 33}
*/
void v_q_scale_down(INT_T* const vec, ITER_T len, SCALE_T scvec);
/**
* @brief Performs the transpose on the input matrix.
* @param[in] mat pointer to the input matrix which is to be transposed
* @param[in] nrows number of rows of output matrix
* @param[in] ncols number of columns of output matrix
* @param[out] ret pointer to the output matrix which will hold the transpose
* @return none
* @example mat = { {1, 2},
* {4, 5} }
* ret = { {1, 4},
* {2, 5} }
*
* @example mat = { {1, 2, 3},
* {4, 5, 6} }
* ret = { {1, 4},
* {2, 5},
* {3, 6} }
*/
void m_q_transpose(const INT_T* const mat, ITER_T nrows, ITER_T ncols,
INT_T* const ret);
/**
* @brief Performs the row-order or the column-order reversal of the 2-D input matrix.
* @param[in] mat pointer to the (row / column-major) input matrix on which reversal is to be performed
* @param[in] nrows number of rows of the input matrix
* @param[in] ncols number of columns of the input matrix
* @param[in] axis axis of reversal; 0 for reversal along rows and 1 for reversal along columns
* @param[out] mat_out pointer to the output matrix
* @return none
* @example mat = { {1, 2},
* {4, 5} }
* nrows = 2
* ncols = 2
* axis = 0
* ret = { {4, 5},
* {1, 2} }
*/
void m_q_reverse(const INT_T* const mat, ITER_T nrows, ITER_T ncols,
ITER_T axis, INT_T* const ret);
/**
* @brief Performs the column-wise addition of a bias term to the input matrix.
* dim(mat) = dim(ret) = [nrows][ncols]; dim(vec) = [ncols].
* @param[in] mat pointer to the input matrix on which addition is to be performed
* @param[in] vec pointer to the bias vector which is to be added
* @param[in] nrows number of rows of the input matrix
* @param[in] ncols number of columns of the input matrix
* @param[out] ret pointer to the output matrix
* @param[in] scmat scaling factor for the input matrix
* @param[in] scvec scaling factor for the bias vector
* @param[in] scret scaling factor for the output matrix
* @return none
* @example mat = {1324, 5453, 3454, 3435, 8789, 3411, 5412, 8934}
* vec = {8452, 2341, 9383, 2353}
* nrows = 4
* ncols = 2
* ret = {2775, 3311, 4072, 2305, 6507, 2290, 5051, 5055}
* scmat = 1
* scvec = 2
* scret = 2
*/
void m_q_add_vec(const INT_T* const mat, const INT_T* const vec,
ITER_T nrows, ITER_T ncols, INT_T* const ret,
SCALE_T scmat, SCALE_T scvec, SCALE_T scret);
/**
* @brief Performs the column-wise subtraction of a bias term from the input matrix.
* dim(mat) = dim(ret) = [nrows][ncols]; dim(vec) = [ncols].
* @param[in] mat pointer to the input matrix from which subtraction is to be performed
* @param[in] vec pointer to the bias vector which is to be subtracted
* @param[in] nrows number of rows of the input matrix
* @param[in] ncols number of columns of the input matrix
* @param[out] ret pointer to the output matrix
* @param[in] scmat scaling factor for the input matrix
* @param[in] scvec scaling factor for the bias vector
* @param[in] scret scaling factor for the output matrix
* @return none
* @example mat = {1324, 5453, 3454, 3435, 8789, 3411, 5412, 8934}
* vec = {8452, 2341, 9383, 2353}
* nrows = 4
* ncols = 2
* ret = {-1451, 2141, -618, 1129, 2281, 1120, 361, 3879}
* scmat = 1
* scvec = 2
* scret = 2
*/
void m_q_sub_vec(const INT_T* const mat, const INT_T* const vec,
ITER_T nrows, ITER_T ncols, INT_T* const ret,
SCALE_T scmat, SCALE_T scvec, SCALE_T scret);
// Function for multiplying a matrix with a vector.
void m_q_mulvec(const INT_T* const mat, const INT_T* const vec, ITER_T nrows,
ITER_T ncols, INT_T* const ret, SCALE_T scmat, SCALE_T scvec,
SCALE_T H1, SCALE_T H2);
/**
* @brief Performs sparse matrix multiplication of a matrix and a vector.
* col_indices and mat_values combined are a sparse representation; dim(vec) = [ndims].
* mat_values[i] is the i^th non-zero value of the input matrix, and col_indices[i] encodes the location of mat_values[i].
* Number of zeroes before col_indices[i] : row of mat_values[i]
* col_indices[i - l] where l is the number of zeroes before col_indices[i]: column of mat_values[i]
* @param[in] col_indices pointer to input matrix which stores the column indices of non-zero values of matrix A
* @param[in] mat_values pointer to input matrix which stores the non-zero values of matrix A
* @param[in] vec pointer to the input vector
* @param[in] ndims dimension of the multiplication vector
* @param[out] ret pointer to the output matrix
* @param[in] scmat scale factor of the input matrix
* @param[in] scvec scale factor of the input vector
* @param[in] scret scale factor of the output matrix
@return none
* @example mat = {{10, 20, 30, 40, 50, 60, 70, 0, 0, 0, 0, 0, 0, 0},
* {0, 80, 0, 90, 0, 100, 0, 110, 0, 120, 0, 130, 0, 140}}
* col_indices = {1, 2, 3, 4, 5, 6, 7, 0, 2, 4, 6, 8, 10, 12, 14, 0}
* mat_values = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140}
* vec = {1, 2}
* ndims = 2
* scmat = 1
* scvec = 2
* scret = 4
* ret = {1, 22, 3, 27, 6, 32, 8, 27, 0, 30, 0, 32, 0, 35}
*/
void m_q_sparse_mulvec(const ITER_T* const col_indices, const INT_T* const mat_values,
const INT_T* const vec, ITER_T ndims, INT_T* const ret,
SCALE_T scmat, SCALE_T scvec, SCALE_T scret);
/**
* @brief Performs the channel-wise addition of a bias term to the input tensor.
* dim(mat) = dim(ret) = [nbatches][nrows][ncols][nchannels]; dim(vec) = [nchannels].
* @param[in] mat pointer to the input tensor on which addition is to be performed
* @param[in] vec pointer to the bias vector which is to be added
* @param[in] nbatches number of batches of the input tensor
* @param[in] nrows number of rows of the input tensor
* @param[in] ncols number of columns of the input tensor
* @param[in] nchannels number of channels of the input tensor
* @param[out] ret pointer to the output tensor
* @param[in] scmat scaling factor for the input tensor
* @param[in] scvec scaling factor for the bias vector
* @param[in] scret scaling factor for the output tensor
* @return none
* @example mat = { { {1324, 5453}, {3454, 3435} },
* { {8789, 3411}, {5412, 8934} } },
* { { {6895, 1211}, {6790, 5425} },
* { {8976, 4539}, {9348, 9321} } }
* vec = {8452, 2341}
* nrows = 4
* ncols = 2
* ret = { { {2775, 3311}, {3840, 2302} },
* { {6507, 2290}, {4819, 5052} } },
* { { {5560, 1190}, {5508, 3297} },
* { {6601, 2854}, {6787, 5245} } }
* scmat = 1
* scvec = 2
* scret = 2
*/
void t_q_add_vec(const INT_T* const mat, const INT_T* const vec,
ITER_T nbatches, ITER_T nrows, ITER_T ncols,
ITER_T nchannels, INT_T* const ret, SCALE_T scmat,
SCALE_T scvec, SCALE_T scret);
/**
* @brief Performs the channel-wise subtraction of a bias term from the input tensor.
* dim(mat) = dim(ret) = [nbatches][nrows][ncols][nchannels]; dim(vec) = [nchannels].
* @param[in] mat pointer to the input tensor from which subtraction is to be performed
* @param[in] vec pointer to the bias vector which is to be subtracted
* @param[in] nbatches number of batches of the input tensor
* @param[in] nrows number of rows of the input tensor
* @param[in] ncols number of columns of the input tensor
* @param[in] nchannels number of channels of the input tensor
* @param[out] ret pointer to the output tensor
* @param[in] scmat scaling factor for the input tensor
* @param[in] scvec scaling factor for the bias vector
* @param[in] scret scaling factor for the output tensor
* @return none
* @example mat = { { {1324, 5453}, {3454, 3435} },
* { {8789, 3411}, {5412, 8934} } },
* { { {6895, 1211}, {6790, 5425} },
* { {8976, 4539}, {9348, 9321} } }
* vec = {8452, 2341}
* nrows = 4
* ncols = 2
* ret = { { {-1451, 2141}, {-386, 1132} },
{ {2281, 1120}, {593, 3882} } },
{ { {1334, 20}, {1282, 2127} },
{ {2375, 1684}, {2561, 4075} } }
* scmat = 1
* scvec = 2
* scret = 2
*/
void t_q_sub_vec(const INT_T* const ten, const INT_T* const vec,
ITER_T nbatches, ITER_T nrows, ITER_T ncols,
ITER_T nchannels, INT_T* const ret, SCALE_T scmat,
SCALE_T scvec, SCALE_T scret);
#endif

Просмотреть файл

@ -100,7 +100,7 @@ void q_mbconv_block(const INT_T* const input, const INT_T* const filter1,
ITER_T counter = 0;
for (S_ITER_T hf = -(HF >> 1); hf <= (HF >> 1); hf++) {
for (S_ITER_T wf = -(WF >> 1); wf <= (WF >> 1); wf++) {
if (((h + hf) < 0) || ((h + hf) >= H) || ((w + wf) < 0) || ((w + wf) >= W)) {
if (((h + hf) < 0) || ((h + hf) >= (S_ITER_T)H) || ((w + wf) < 0) || ((w + wf) >= (S_ITER_T)W)) {
treesumBuffer[counter] = 0;
} else {
treesumBuffer[counter] = ((INTM_T)convBuffer1[(((ITER_T)(h + hf)) % HF) * W * CTemp + ((ITER_T)(w + wf)) * CTemp + g]) *

Просмотреть файл

@ -128,6 +128,17 @@ void v_q_scalar_sub(INT_T scalar, const INT_T* const vec, ITER_T len,
}
}
void v_q_sub_scalar(const INT_T* const vec, INT_T scalar, ITER_T len,
INT_T* const ret, SCALE_T scvec, SCALE_T scscalar, SCALE_T scret) {
for (ITER_T i = 0; i < len; i++) {
#ifdef SHIFT
ret[i] = ((vec[i] >> (scvec + scret)) - (scalar >> (scscalar + scret)));
#else
ret[i] = ((vec[i] / scvec) / scret) - ((scalar / scscalar) / scret);
#endif
}
}
void v_q_scalar_mul(INT_T scalar, const INT_T* const vec, ITER_T len,
INT_T* const ret, SCALE_T scscalar, SCALE_T scvec) {
for (ITER_T i = 0; i < len; i++) {
@ -139,22 +150,208 @@ void v_q_scalar_mul(INT_T scalar, const INT_T* const vec, ITER_T len,
}
}
void v_q_argmax(const INT_T* const vec, ITER_T len, ITER_T* const ret) {
INT_T max_value = vec[0];
ITER_T max_index = 0;
for (ITER_T i = 1; i < len; i++) {
if (max_value < vec[i]) {
max_index = i;
max_value = vec[i];
}
}
*ret = max_index;
}
void v_q_relu(INT_T* const vec, ITER_T len) {
for (ITER_T i = 0; i < len; i++) {
if (vec[i] < 0) {
vec[i] = 0;
}
}
}
void v_q_exp(const INT_T* const vec, ITER_T len, INT_T* const ret,
SCALE_T scvec, SCALE_T scret) {
for (ITER_T i = 0; i < len; i++) {
ret[i] = ((INT_T)(exp(((float)vec[i]) / scvec) * scret));
}
}
void v_q_scale_up(INT_T* const vec, ITER_T len, SCALE_T scvec) {
for (ITER_T i = 0; i < len; i++) {
#ifdef SHIFT
vec[i] <<= scvec;
#else
vec[i] *= scvec;
#endif
}
}
void v_q_scale_down(INT_T* const vec, ITER_T len, SCALE_T scvec) {
for (ITER_T i = 0; i < len; i++) {
#ifdef SHIFT
vec[i] >>= scvec;
#else
vec[i] /= scvec;
#endif
}
}
void m_q_transpose(const INT_T* const mat, ITER_T nrows, ITER_T ncols,
INT_T* const ret) {
ITER_T len = nrows * ncols, counter = 0;
for (ITER_T i = 0; i < len; i++) {
if (counter >= len) {
counter -= len - 1;
}
ret[i] = mat[counter];
counter += nrows;
}
}
void m_q_reverse(const INT_T* const mat, ITER_T nrows, ITER_T ncols, ITER_T axis,
INT_T* const ret) {
ITER_T len = nrows * ncols;
if (axis == 0) {
ITER_T col_counter = 0, row_index = len - ncols;
for (ITER_T i = 0; i < len; i++) {
if (col_counter >= ncols) {
col_counter = 0;
row_index -= ncols;
}
ret[i] = mat[row_index + col_counter];
col_counter++;
}
} else {
S_ITER_T row_counter = ncols - 1;
ITER_T col_index = 0;
for (ITER_T i = 0; i < len; i++) {
if (row_counter < 0) {
row_counter = ncols - 1;
col_index += ncols;
}
ret[i] = mat[col_index + (ITER_T)row_counter];
row_counter--;
}
}
}
void m_q_add_vec(const INT_T* const mat, const INT_T* const vec,
ITER_T nrows, ITER_T ncols, INT_T* const ret,
SCALE_T scmat, SCALE_T scvec, SCALE_T scret) {
ITER_T len = nrows * ncols;
for (ITER_T i = 0, w = 0; i < len; i++, w++) {
if (w >= ncols) {
w = 0;
}
#ifdef SHIFT
ret[i] = ((mat[i] >> (scmat + scret)) + (vec[w] >> (scvec + scret)));
#else
ret[i] = ((mat[i] / scmat) / scret) + ((vec[w] / scvec) / scret);
#endif
}
}
void m_q_sub_vec(const INT_T* const mat, const INT_T* const vec,
ITER_T nrows, ITER_T ncols, INT_T* const ret,
SCALE_T scmat, SCALE_T scvec, SCALE_T scret) {
ITER_T len = nrows * ncols;
for (ITER_T i = 0, w = 0; i < len; i++, w++) {
if (w >= ncols) {
w = 0;
}
#ifdef SHIFT
ret[i] = ((mat[i] >> (scmat + scret)) - (vec[w] >> (scvec + scret)));
#else
ret[i] = ((mat[i] / scmat) / scret) - ((vec[w] / scvec) / scret);
#endif
}
}
void m_q_mulvec(const INT_T* const mat, const INT_T* const vec, ITER_T nrows,
ITER_T ncols, INT_T* const ret, SCALE_T scmat, SCALE_T scvec,
SCALE_T H1, SCALE_T H2) {
INTM_T tmp[ncols];
INTM_T treesumBuffer[ncols];
for (ITER_T row = 0; row < nrows; row++) {
INT_T* mat_offset = (INT_T*)mat + row * ncols;
for (ITER_T col = 0; col < ncols; col++) {
tmp[col] = ((INTM_T)(*mat_offset++) * (INTM_T)vec[col]);
treesumBuffer[col] = ((INTM_T)(*mat_offset++) * (INTM_T)vec[col]);
}
v_q_treesum(&tmp[0], ncols, H1, H2);
v_q_treesum(&treesumBuffer[0], ncols, H1, H2);
#ifdef SHIFT
ret[row] = (tmp[0] >> (scmat + scvec));
ret[row] = (treesumBuffer[0] >> (scmat + scvec));
#else
ret[row] = ((tmp[0] / scmat) / scvec);
ret[row] = ((treesumBuffer[0] / scmat) / scvec);
#endif
}
}
void m_q_sparse_mulvec(const ITER_T* const col_indices, const INT_T* const mat_values,
const INT_T* const vec, ITER_T ndims, INT_T* const ret,
SCALE_T scmat, SCALE_T scvec, SCALE_T scret) {
ITER_T iter_index = 0, iter_value = 0;
for (ITER_T k = 0; k < ndims; k++) {
ITER_T index = col_indices[iter_index];
while (index != 0) {
#ifdef SHIFT
ret[index - 1] += (((INTM_T)mat_values[iter_value] * (INTM_T)vec[k]) >> (scmat + scvec + scret));
#else
ret[index - 1] += (((INTM_T)mat_values[iter_value] * (INTM_T)vec[k]) / ((INTM_T)scmat * (INTM_T)scvec * (INTM_T)scret));
#endif
iter_index++;
iter_value++;
index = col_indices[iter_index];
}
iter_index++;
}
}
void t_q_add_vec(const INT_T* const mat, const INT_T* const vec,
ITER_T nbatches, ITER_T nrows, ITER_T ncols,
ITER_T nchannels, INT_T* const ret, SCALE_T scmat,
SCALE_T scvec, SCALE_T scret) {
ITER_T len = nbatches * nrows * ncols * nchannels;
for (ITER_T i = 0, c = 0; i < len; i++, c++) {
if (c >= nchannels) {
c = 0;
}
#ifdef SHIFT
ret[i] = ((mat[i] >> (scmat + scret)) + (vec[c] >> (scvec + scret)));
#else
ret[i] = ((mat[i] / scmat) / scret) + ((vec[c] / scvec) / scret);
#endif
}
}
void t_q_sub_vec(const INT_T* const mat, const INT_T* const vec,
ITER_T nbatches, ITER_T nrows, ITER_T ncols,
ITER_T nchannels, INT_T* const ret, SCALE_T scmat,
SCALE_T scvec, SCALE_T scret) {
ITER_T len = nbatches * nrows * ncols * nchannels;
for (ITER_T i = 0, c = 0; i < len; i++, c++) {
if (c >= nchannels) {
c = 0;
}
#ifdef SHIFT
ret[i] = ((mat[i] >> (scmat + scret)) - (vec[c] >> (scvec + scret)));
#else
ret[i] = ((mat[i] / scmat) / scret) - ((vec[c] / scvec) / scret);
#endif
}
}

Просмотреть файл

@ -7,11 +7,12 @@
// All values generated from Seedot on Wider Regression dataset.
// By default, all tests run without using bit-shifting operations.
// Function for matching the predicted and expected quantized outputs.
int check_output(const INT_T* const pred, const INT_T* const expected,
unsigned len) {
static int check_output(const INT_T* const pred, const INT_T* const expected,
unsigned len) {
for (unsigned i = 0; i < len; i++)
{
if (pred[i] != expected[i]) {
printf("Output: %d, Expected: %d at Index: %d\n", pred[i], expected[i], i);
return 1;
}
}
@ -21,20 +22,35 @@ int check_output(const INT_T* const pred, const INT_T* const expected,
// Test v_q_treesum() function.
int test_v_q_treesum() {
INTM_T qvec_A[128] = {3038976, 0, 0, 1514478, 0, 0, 778261, 32670, -2619599, 0, 3849336, 5310900, 0, 0, 0, 0, 0, 0, 0, 0, -142898, 1510353, 0, -6888482, 0, -760720, 1296384, -6749490, -9687275, -686501, -743600, -2112105, 0, 8962408, 0, -17460547, -1477630, 0, 0, -2195694, -860184, -214912, 0, -1389548, 0, 0, 2081898, 0, 0, 23544, -3351768, 0, 0, 0, 3886614, 0, -5839384, 0, 842100, 4051917, 0, 0, 1459796, 2006850, 517867, 3044471, 0, 2578300, 0, 0, -1921101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4709000, 0, 0, 0, 0, -65664, 0, 0, 4313415, -1126680, 0, 0, -8524140, 0, 6248088, 0, 0, 0, 0, 2273558, 0, 0, -738913, 0, 0, -13912800, 0, 0, 0, -5329773, 5346088, 0, 113245, 0, 0, 8590397, 0, 2224368, 1020225, 489580, 0, -185584, -86475, 0, -2013258, -2417536, 0};
const INTM_T expected[1] = {-135837};
#ifdef SHIFT
INTM_T expected = {-135840};
#else
INTM_T expected = {-135837};
#endif
v_q_treesum(&qvec_A[0], 128, 7, 0);
return check_output((const INT_T*)qvec_A, (const INT_T*)expected, 1);
if (qvec_A[0] != expected) {
printf("TreeSum Output: %d, Expected: %d\n", qvec_A[0], expected);
return 1;
}
return 0;
}
// Test v_q_add() function.
int test_v_q_add() {
const INT_T qvec_A[8] = {-425, -169, -3534, 524, -2739, 87, 52, 292};
const INT_T qvec_B[8] = {-18777, -9518, 4055, -7309, 8584, -17257, -5280, -7933};
const INT_T expected[8] = {-2772, -1358, -3028, -389, -1666, -2070, -608, -699};
INT_T pred[8];
v_q_add(&qvec_A[0], &qvec_B[0], 8, &pred[0], 1, 8, 1);
#ifdef SHIFT
const INT_T expected[8] = {-2773, -1359, -3028, -390, -1666, -2071, -608, -700};
v_q_add(&qvec_A[0], &qvec_B[0], 8, &pred[0], 0, 3, 0);
#else
const INT_T expected[8] = {-2772, -1358, -3028, -389, -1666, -2070, -608, -699};
v_q_add(&qvec_A[0], &qvec_B[0], 8, &pred[0], 1, 8, 1);
#endif
return check_output(pred, expected, 8);
}
@ -42,10 +58,16 @@ int test_v_q_add() {
int test_v_q_sub() {
const INT_T qvec_A[8] = {-425, -169, -3534, 524, -2739, 87, 52, 292};
const INT_T qvec_B[8] = {-18777, -9518, 4055, -7309, 8584, -17257, -5280, -7933};
const INT_T expected[8] = {1922, 1020, -4040, 1437, -3812, 2244, 712, 1283};
INT_T pred[8];
v_q_sub(&qvec_A[0], &qvec_B[0], 8, &pred[0], 1, 8, 1);
#ifdef SHIFT
const INT_T expected[8] = {1923, 1021, -4040, 1438, -3812, 2245, 712, 1284};
v_q_sub(&qvec_A[0], &qvec_B[0], 8, &pred[0], 0, 3, 0);
#else
const INT_T expected[8] = {1922, 1020, -4040, 1437, -3812, 2244, 712, 1283};
v_q_sub(&qvec_A[0], &qvec_B[0], 8, &pred[0], 1, 8, 1);
#endif
return check_output(pred, expected, 8);
}
@ -53,10 +75,16 @@ int test_v_q_sub() {
int test_v_q_hadamard() {
const INT_T qvec_A[8] = {16378, 13638, 16378, 9787, 14861, 16378, 10661, 11018};
const INT_T qvec_B[8] = {178, 1064, -2048, 1718, -1663, 851, 1244, 1282};
const INT_T expected[8] = {1423, 7085, -16378, 8209, -12067, 6805, 6475, 6897};
INT_T pred[8];
v_q_hadamard(&qvec_A[0], &qvec_B[0], 8, &pred[0], 32, 64);
#ifdef SHIFT
const INT_T expected[8] = {1423, 7085, -16378, 8209, -12068, 6805, 6475, 6897};
v_q_hadamard(&qvec_A[0], &qvec_B[0], 8, &pred[0], 5, 6);
#else
const INT_T expected[8] = {1423, 7085, -16378, 8209, -12067, 6805, 6475, 6897};
v_q_hadamard(&qvec_A[0], &qvec_B[0], 8, &pred[0], 32, 64);
#endif
return check_output(pred, expected, 8);
}
@ -87,7 +115,12 @@ int test_v_q_scalar_add() {
const INT_T expected[8] = {16378, 13638, 16378, 9787, 14861, 16378, 10661, 11018};
INT_T pred[8];
v_q_scalar_add(qscalar_A, &qvec_B[0], 8, &pred[0], 256, 1, 1);
#ifdef SHIFT
v_q_scalar_add(qscalar_A, &qvec_B[0], 8, &pred[0], 8, 0, 0);
#else
v_q_scalar_add(qscalar_A, &qvec_B[0], 8, &pred[0], 256, 1, 1);
#endif
return check_output(pred, expected, 8);
}
@ -98,7 +131,28 @@ int test_v_q_scalar_sub() {
const INT_T expected[8] = {16384, 13624, 16384, 9744, 14856, 16384, 10624, 10984};
INT_T pred[8];
v_q_scalar_sub(qscalar_A, &qvec_B[0], 8, &pred[0], 1, 1, 1);
#ifdef SHIFT
v_q_scalar_sub(qscalar_A, &qvec_B[0], 8, &pred[0], 0, 0, 0);
#else
v_q_scalar_sub(qscalar_A, &qvec_B[0], 8, &pred[0], 1, 1, 1);
#endif
return check_output(pred, expected, 8);
}
// Test v_q_sub_scalar() function.
int test_v_q_sub_scalar() {
const INT_T qvec_A[8] = {0, 2760, 0, 6640, 1528, 0, 5760, 5400};
const INT_T qscalar_B = 16384;
const INT_T expected[8] = {-16384, -13624, -16384, -9744, -14856, -16384, -10624, -10984};
INT_T pred[8];
#ifdef SHIFT
v_q_sub_scalar(&qvec_A[0], qscalar_B, 8, &pred[0], 0, 0, 0);
#else
v_q_sub_scalar(&qvec_A[0], qscalar_B, 8, &pred[0], 1, 1, 1);
#endif
return check_output(pred, expected, 8);
}
@ -109,21 +163,245 @@ int test_v_q_scalar_mul() {
const INT_T expected[8] = {16261, 13521, 16261, 9670, 14744, 16261, 10544, 10901};
INT_T pred[8];
v_q_scalar_mul(qscalar_A, &qvec_B[0], 8, &pred[0], 128, 256);
#ifdef SHIFT
v_q_scalar_mul(qscalar_A, &qvec_B[0], 8, &pred[0], 7, 8);
#else
v_q_scalar_mul(qscalar_A, &qvec_B[0], 8, &pred[0], 128, 256);
#endif
return check_output(pred, expected, 8);
}
// Test v_q_argmax() function.
int test_v_q_argmax() {
const INT_T qvec_A[8] = {1675, 9870, -9876, -1234, 5674, 28765, 9876, 12654};
const ITER_T expected[1] = {5};
ITER_T pred[1];
v_q_argmax(&qvec_A[0], 8, &pred[0]);
return check_output((const INT_T*)pred, (const INT_T*)expected, 1);
}
// Test v_q_relu() function.
int test_v_q_relu() {
INT_T qvec_A[16] = {-3648, 648, -2147, -2348, 1468, -4348, 3648, 3648, -648, 9648, 3778, 4743, 7483, -243, 8, -21};
const INT_T expected[16] = {0, 648, 0, 0, 1468, 0, 3648, 3648, 0, 9648, 3778, 4743, 7483, 0, 8, 0};
v_q_relu(&qvec_A[0], 16);
return check_output(qvec_A, expected, 16);
}
// Test v_q_exp() function.
int test_v_q_exp() {
const INT_T qvec_A[16] = {13, 54, 34, 35, 87, 11, 41, 93, 89, 11, 90, 25, 76, 39, 48, 93};
const INT_T expected[16] = {40, 6832, 560, 635, 29493, 31, 1345, -22628, 18482, 31, 25215, 182, -24195, 1047, 3227, -22628};
INT_T pred[16];
v_q_exp(&qvec_A[0], 16, &pred[0], 8, 8);
return check_output(pred, expected, 16);
}
// Test v_q_scale_up() function.
int test_v_q_scale_up() {
INT_T qvec_A[16] = {423, -987, -2342, 1232, -324, 843, 982, 2342, 343, 654, 987, 654, 567, 2876, 987, 1265};
INT_T expected[16] = {846, -1974, -4684, 2464, -648, 1686, 1964, 4684, 686, 1308, 1974, 1308, 1134, 5752, 1974, 2530};
#ifdef SHIFT
v_q_scale_up(&qvec_A[0], 16, 1);
#else
v_q_scale_up(&qvec_A[0], 16, 2);
#endif
return check_output(qvec_A, expected, 16);
}
// Test v_q_scale_down() function.
int test_v_q_scale_down() {
INT_T qvec_A[16] = {4232, -9879, -2342, 1232, -3242, 8432, 9823, 2342, 343, 6543, 9876, 6542, 5674, 28765, 9876, 12654};
#ifdef SHIFT
const INT_T expected[16] = {2116, -4940, -1171, 616, -1621, 4216, 4911, 1171, 171, 3271, 4938, 3271, 2837, 14382, 4938, 6327};
v_q_scale_down(&qvec_A[0], 16, 1);
#else
const INT_T expected[16] = {2116, -4939, -1171, 616, -1621, 4216, 4911, 1171, 171, 3271, 4938, 3271, 2837, 14382, 4938, 6327};
v_q_scale_down(&qvec_A[0], 16, 2);
#endif
return check_output(qvec_A, expected, 16);
}
// Test m_q_transpose() function.
int test_m_q_transpose() {
const INT_T qmat_A[4 * 3] = {1238, 5432, 1834, 6543, -5698, -2342, 9876, 5674, 8435, 6542, 7824, 3924};
const INT_T expected_A[3 * 4] = {1238, 6543, 9876, 6542, 5432, -5698, 5674, 7824, 1834, -2342, 8435, 3924};
INT_T pred_A[12];
const INT_T qmat_B[3 * 3] = {1238, 5432, 1834, 6543, -5698, -2342, 9876, 5674, 8435};
const INT_T expected_B[3 * 3] = {1238, 6543, 9876, 5432, -5698, 5674, 1834, -2342, 8435};
INT_T pred_B[9];
m_q_transpose(&qmat_A[0], 3, 4, &pred_A[0]);
m_q_transpose(&qmat_B[0], 3, 3, &pred_B[0]);
return (check_output(pred_A, expected_A, 12) || check_output(pred_B, expected_B, 9));
}
// Test m_q_reverse() function.
int test_m_q_reverse() {
const INT_T qmat_A[4 * 4]= {4232, -9879, -2342, 1232, -3242, 8432, 9823, 2342, 343, 6543, 9876, 6542, 5674, 28765, 9876, 12654};
const INT_T expected_A[4 * 4] = {1232, -2342, -9879, 4232, 2342, 9823, 8432, -3242, 6542, 9876, 6543, 343, 12654, 9876, 28765, 5674};
const INT_T expected_B[4 * 4] = {5674, 28765, 9876, 12654, 343, 6543, 9876, 6542, -3242, 8432, 9823, 2342, 4232, -9879, -2342, 1232};
INT_T pred_A[16];
INT_T pred_B[16];
m_q_reverse(&qmat_A[0], 4, 4, 1, &pred_A[0]);
m_q_reverse(&qmat_A[0], 4, 4, 0, &pred_B[0]);
return check_output(pred_A, expected_A, 16) || check_output(pred_B, expected_B, 16);
}
// Test m_q_add_vec() function.
int test_m_q_add_vec() {
const INT_T qmat_A[4 * 4] = {1324, 5453, 3454, 3435, 8789, 3411, 5412, 8934, 6895, 1211, 6790, 5425, 8976, 4539, 9348, 9321};
const INT_T qvec_B[4] = {8452, 2341, 9383, 2353};
const INT_T expected[16] = {2775, 3311, 4072, 2305, 6507, 2290, 5051, 5055, 5560, 1190, 5740, 3300, 6601, 2854, 7019, 5248};
INT_T pred[16];
#ifdef SHIFT
m_q_add_vec(&qmat_A[0], &qvec_B[0], 4, 4, &pred[0], 0, 1, 1);
#else
m_q_add_vec(&qmat_A[0], &qvec_B[0], 4, 4, &pred[0], 1, 2, 2);
#endif
return check_output(pred, expected, 16);
}
// Test m_q_sub_vec() function.
int test_m_q_sub_vec() {
const INT_T qmat_A[4 * 4] = {1324, 5453, 3454, 3435, 8789, 3411, 5412, 8934, 6895, 1211, 6790, 5425, 8976, 4539, 9348, 9321};
const INT_T qvec_B[4] = {8452, 2341, 9383, 2353};
const INT_T expected[16] = {-1451, 2141, -618, 1129, 2281, 1120, 361, 3879, 1334, 20, 1050, 2124, 2375, 1684, 2329, 4072};
INT_T pred[16];
#ifdef SHIFT
m_q_sub_vec(&qmat_A[0], &qvec_B[0], 4, 4, &pred[0], 0, 1, 1);
#else
m_q_sub_vec(&qmat_A[0], &qvec_B[0], 4, 4, &pred[0], 1, 2, 2);
#endif
return check_output(pred, expected, 16);
}
// Test m_q_mulvec() function.
int test_m_q_mulvec() {
const INT_T qmat_A[8 * 4] = {7069, -10389, 1562, -1992, 3262, -37, -1143, -995, 5513, -17035, -14615, -6636, 4733, -403, 4106, -1104, -2707, -1287, -18128, -1832, -10108, -137, 2064, 1207, 5233, 226, 831, -1909, 4489, -1099, 2845, -1261};
const INT_T qvec_B[4] = {1040, 1919, 4254, 4024};
const INT_T expected[8] = {-425, -169, -3534, 524, -2739, 87, 52, 292};
INT_T pred[8];
m_q_mulvec(&qmat_A[0], &qvec_B[0], 8, 4, &pred[0], 128, 64, 2, 0);
#ifdef SHIFT
const INT_T expected[8] = {-426, -170, -3535, 524, -2740, 87, 52, 292};
m_q_mulvec(&qmat_A[0], &qvec_B[0], 8, 4, &pred[0], 7, 6, 2, 0);
#else
const INT_T expected[8] = {-425, -169, -3534, 524, -2739, 87, 52, 292};
m_q_mulvec(&qmat_A[0], &qvec_B[0], 8, 4, &pred[0], 128, 64, 2, 0);
#endif
return check_output(pred, expected, 8);
}
// Test m_q_sparse_mulvec() function.
int test_m_q_sparse_mulvec() {
const ITER_T qmat_A[16] = {1, 2, 3, 4, 5, 6, 7, 0, 2, 4, 6, 8, 10, 12, 14, 0};
const INT_T qmat_B[14] = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140};
const INT_T qvec_C[2] = {1, 2};
const INT_T expected[14] = {1, 22, 3, 27, 6, 32, 8, 27, 0, 30, 0, 32, 0, 35};
INT_T pred[14] = {};
#ifdef SHIFT
m_q_sparse_mulvec(&qmat_A[0], &qmat_B[0], &qvec_C[0], 2, &pred[0], 0, 1, 2);
#else
m_q_sparse_mulvec(&qmat_A[0], &qmat_B[0], &qvec_C[0], 2, &pred[0], 1, 2, 4);
#endif
return check_output(pred, expected, 14);
}
// Test t_q_add_vec() function.
int test_t_q_add_vec() {
const INT_T qmat_A[2 * 2 * 2 * 2] = {1324, 5453,
3454, 3435,
8789, 3411,
5412, 8934,
6895, 1211,
6790, 5425,
8976, 4539,
9348, 9321};
const INT_T qvec_B[2] = {8452, 2341};
const INT_T expected[2 * 2 * 2 * 2] = {2775, 3311,
3840, 2302,
6507, 2290,
4819, 5052,
5560, 1190,
5508, 3297,
6601, 2854,
6787, 5245};
INT_T pred[16];
#ifdef SHIFT
t_q_add_vec(&qmat_A[0], &qvec_B[0], 2, 2, 2, 2, &pred[0], 0, 1, 1);
#else
t_q_add_vec(&qmat_A[0], &qvec_B[0], 2, 2, 2, 2, &pred[0], 1, 2, 2);
#endif
return check_output(pred, expected, 16);
}
// Test t_q_sub_vec() function.
int test_t_q_sub_vec() {
const INT_T qmat_A[2 * 2 * 2 * 2] = {1324, 5453,
3454, 3435,
8789, 3411,
5412, 8934,
6895, 1211,
6790, 5425,
8976, 4539,
9348, 9321};
const INT_T qvec_B[2] = {8452, 2341};
const INT_T expected[2 * 2 * 2 * 2] = {-1451, 2141,
-386, 1132,
2281, 1120,
593, 3882,
1334, 20,
1282, 2127,
2375, 1684,
2561, 4075};
INT_T pred[16];
#ifdef SHIFT
t_q_sub_vec(&qmat_A[0], &qvec_B[0], 2, 2, 2, 2, &pred[0], 0, 1, 1);
#else
t_q_sub_vec(&qmat_A[0], &qvec_B[0], 2, 2, 2, 2, &pred[0], 1, 2, 2);
#endif
return check_output(pred, expected, 16);
}
int main() {
if (test_v_q_treesum()) {
printf("Test Failure for v_q_treesum()!\n");
@ -141,10 +419,36 @@ int main() {
printf("Test Failure for v_q_scalar_add()!\n");
} else if (test_v_q_scalar_sub()) {
printf("Test Failure for v_q_scalar_sub()!\n");
} else if (test_v_q_sub_scalar()) {
printf("Test Failure for v_q_sub_scalar()!\n");
} else if (test_v_q_scalar_mul()) {
printf("Test Failure for v_q_scalar_mul()!\n");
} else if (test_v_q_argmax()) {
printf("Test Failure for v_q_argmax()!\n");
} else if (test_v_q_relu()) {
printf("Test Failure for v_q_relu()!\n");
} else if (test_v_q_exp()) {
printf("Test Failure for v_q_exp()!\n");
} else if (test_v_q_scale_up()) {
printf("Test Failure for v_q_scale_up()!\n");
} else if (test_v_q_scale_down()) {
printf("Test Failure for v_q_scale_down()!\n");
} else if (test_m_q_transpose()) {
printf("Test Failure for m_q_transpose()!\n");
} else if (test_m_q_reverse()) {
printf("Test Failure for m_q_reverse()!\n");
} else if (test_m_q_add_vec()) {
printf("Test Failure for m_q_add_vec()!\n");
} else if (test_m_q_sub_vec()) {
printf("Test Failure for m_q_sub_vec()!\n");
} else if (test_m_q_mulvec()) {
printf("Test Failure for m_q_mulvec()!\n");
} else if (test_m_q_sparse_mulvec()) {
printf("Test Failure for m_q_sparse_mulvec()!\n");
} else if (test_t_q_add_vec()) {
printf("Test Failure for t_q_add_vec()!\n");
} else if (test_t_q_sub_vec()) {
printf("Test Failure for t_q_sub_vec()!\n");
} else {
printf("All Tests Passed!\n");
return 0;