Merge "Add vp10_fwd_txfm2d_sse2" into nextgenv2
This commit is contained in:
Коммит
0a9eedfbef
|
@ -174,6 +174,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_ANS) += vp10_ans_test.cc
|
|||
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += sum_squares_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += subtract_test.cc
|
||||
|
||||
LIBVPX_TEST_SRCS-$(HAVE_SSE2) += vp10_fwd_txfm2d_sse2_test.cc
|
||||
|
||||
ifeq ($(CONFIG_EXT_INTER),yes)
|
||||
LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc
|
||||
LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_sad_test.cc
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./vp10_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/vp10_txfm_test.h"
|
||||
#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
TEST(vp10_fwd_txfm2d_sse2, accuracy) {
|
||||
int16_t input[4096] = {0};
|
||||
int32_t output_sse2[4096] = {0};
|
||||
int32_t output_c[4096] = {0};
|
||||
|
||||
int txfm_num = 17;
|
||||
|
||||
TXFM_2D_CFG cfg_list[] = {
|
||||
fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_dct_8,
|
||||
fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_dct_32,
|
||||
fwd_txfm_2d_cfg_dct_dct_64, fwd_txfm_2d_cfg_dct_adst_4,
|
||||
fwd_txfm_2d_cfg_dct_adst_8, fwd_txfm_2d_cfg_dct_adst_16,
|
||||
fwd_txfm_2d_cfg_dct_adst_32, fwd_txfm_2d_cfg_adst_dct_4,
|
||||
fwd_txfm_2d_cfg_adst_dct_8, fwd_txfm_2d_cfg_adst_dct_16,
|
||||
fwd_txfm_2d_cfg_adst_dct_32, fwd_txfm_2d_cfg_adst_adst_4,
|
||||
fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_adst_16,
|
||||
fwd_txfm_2d_cfg_adst_adst_32,
|
||||
};
|
||||
|
||||
Fwd_Txfm2d_Func txfm2d_func_c_list[] = {
|
||||
vp10_fwd_txfm2d_4x4_c, vp10_fwd_txfm2d_8x8_c, vp10_fwd_txfm2d_16x16_c,
|
||||
vp10_fwd_txfm2d_32x32_c, vp10_fwd_txfm2d_64x64_c,
|
||||
};
|
||||
|
||||
Fwd_Txfm2d_Func txfm2d_func_sse2_list[] = {
|
||||
vp10_fwd_txfm2d_4x4_sse2, vp10_fwd_txfm2d_8x8_sse2,
|
||||
vp10_fwd_txfm2d_16x16_sse2, vp10_fwd_txfm2d_32x32_sse2,
|
||||
vp10_fwd_txfm2d_64x64_sse2,
|
||||
};
|
||||
|
||||
for (int i = 0; i < txfm_num; i++) {
|
||||
TXFM_2D_CFG cfg = cfg_list[i];
|
||||
int txfm_size = cfg.txfm_size;
|
||||
int func_idx = get_max_bit(txfm_size) - 2;
|
||||
Fwd_Txfm2d_Func txfm2d_func_c = txfm2d_func_c_list[func_idx];
|
||||
Fwd_Txfm2d_Func txfm2d_func_sse2 = txfm2d_func_sse2_list[func_idx];
|
||||
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
// init input
|
||||
for (int r = 0; r < txfm_size; r++) {
|
||||
for (int c = 0; c < txfm_size; c++) {
|
||||
input[r * txfm_size + c] = rnd.Rand16() % base;
|
||||
}
|
||||
}
|
||||
|
||||
txfm2d_func_c(input, output_c, cfg.txfm_size, &cfg, 10);
|
||||
txfm2d_func_sse2(input, output_sse2, cfg.txfm_size, &cfg, 10);
|
||||
for (int r = 0; r < txfm_size; r++) {
|
||||
for (int c = 0; c < txfm_size; c++) {
|
||||
EXPECT_EQ(output_c[r * txfm_size + c], output_sse2[r * txfm_size + c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} // anonymous namespace
|
|
@ -615,15 +615,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
#fwd txfm
|
||||
add_proto qw/void vp10_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
|
||||
specialize qw/vp10_fwd_txfm2d_4x4/;
|
||||
specialize qw/vp10_fwd_txfm2d_4x4 sse2/;
|
||||
add_proto qw/void vp10_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
|
||||
specialize qw/vp10_fwd_txfm2d_8x8/;
|
||||
specialize qw/vp10_fwd_txfm2d_8x8 sse2/;
|
||||
add_proto qw/void vp10_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
|
||||
specialize qw/vp10_fwd_txfm2d_16x16/;
|
||||
specialize qw/vp10_fwd_txfm2d_16x16 sse2/;
|
||||
add_proto qw/void vp10_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
|
||||
specialize qw/vp10_fwd_txfm2d_32x32/;
|
||||
specialize qw/vp10_fwd_txfm2d_32x32 sse2/;
|
||||
add_proto qw/void vp10_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
|
||||
specialize qw/vp10_fwd_txfm2d_64x64/;
|
||||
specialize qw/vp10_fwd_txfm2d_64x64 sse2/;
|
||||
|
||||
#inv txfm
|
||||
add_proto qw/void vp10_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,117 @@
|
|||
#include "vp10/common/x86/vp10_txfm1d_sse2.h"
|
||||
|
||||
static inline void int16_array_with_stride_to_int32_array_without_stride(
|
||||
const int16_t *input, int stride, int32_t *output, int txfm1d_size) {
|
||||
int r, c;
|
||||
for (r = 0; r < txfm1d_size; r++) {
|
||||
for (c = 0; c < txfm1d_size; c++) {
|
||||
output[r * txfm1d_size + c] = (int32_t)input[r * stride + c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
|
||||
const int8_t *cos_bit, const int8_t *stage_range);
|
||||
|
||||
static inline TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
|
||||
switch (txfm_type) {
|
||||
case TXFM_TYPE_DCT4:
|
||||
return vp10_fdct4_new_sse2;
|
||||
break;
|
||||
case TXFM_TYPE_DCT8:
|
||||
return vp10_fdct8_new_sse2;
|
||||
break;
|
||||
case TXFM_TYPE_DCT16:
|
||||
return vp10_fdct16_new_sse2;
|
||||
break;
|
||||
case TXFM_TYPE_DCT32:
|
||||
return vp10_fdct32_new_sse2;
|
||||
break;
|
||||
case TXFM_TYPE_DCT64:
|
||||
return vp10_fdct64_new_sse2;
|
||||
break;
|
||||
case TXFM_TYPE_ADST4:
|
||||
return vp10_fadst4_new_sse2;
|
||||
break;
|
||||
case TXFM_TYPE_ADST8:
|
||||
return vp10_fadst8_new_sse2;
|
||||
break;
|
||||
case TXFM_TYPE_ADST16:
|
||||
return vp10_fadst16_new_sse2;
|
||||
break;
|
||||
case TXFM_TYPE_ADST32:
|
||||
return vp10_fadst32_new_sse2;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void fwd_txfm2d_sse2(const int16_t *input, int32_t *output,
|
||||
const int stride, const TXFM_2D_CFG *cfg,
|
||||
int32_t *txfm_buf) {
|
||||
const int txfm_size = cfg->txfm_size;
|
||||
const int8_t *shift = cfg->shift;
|
||||
const int8_t *stage_range_col = cfg->stage_range_col;
|
||||
const int8_t *stage_range_row = cfg->stage_range_row;
|
||||
const int8_t *cos_bit_col = cfg->cos_bit_col;
|
||||
const int8_t *cos_bit_row = cfg->cos_bit_row;
|
||||
const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
|
||||
const TxfmFuncSSE2 txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
|
||||
|
||||
__m128i *buf_128 = (__m128i *)txfm_buf;
|
||||
__m128i *out_128 = (__m128i *)output;
|
||||
int num_per_128 = 4;
|
||||
int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;
|
||||
|
||||
int16_array_with_stride_to_int32_array_without_stride(input, stride, txfm_buf,
|
||||
txfm_size);
|
||||
round_shift_array_32_sse2(buf_128, out_128, txfm2d_size_128, -shift[0]);
|
||||
txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
|
||||
round_shift_array_32_sse2(buf_128, out_128, txfm2d_size_128, -shift[1]);
|
||||
transpose_32(txfm_size, out_128, buf_128);
|
||||
txfm_func_row(buf_128, out_128, cos_bit_row, stage_range_row);
|
||||
round_shift_array_32_sse2(out_128, buf_128, txfm2d_size_128, -shift[2]);
|
||||
transpose_32(txfm_size, buf_128, out_128);
|
||||
}
|
||||
|
||||
void vp10_fwd_txfm2d_4x4_sse2(const int16_t *input, int32_t *output,
|
||||
const int stride, const TXFM_2D_CFG *cfg,
|
||||
const int bd) {
|
||||
int32_t txfm_buf[16];
|
||||
(void)bd;
|
||||
fwd_txfm2d_sse2(input, output, stride, cfg, txfm_buf);
|
||||
}
|
||||
|
||||
void vp10_fwd_txfm2d_8x8_sse2(const int16_t *input, int32_t *output,
|
||||
const int stride, const TXFM_2D_CFG *cfg,
|
||||
const int bd) {
|
||||
int32_t txfm_buf[64];
|
||||
(void)bd;
|
||||
fwd_txfm2d_sse2(input, output, stride, cfg, txfm_buf);
|
||||
}
|
||||
|
||||
void vp10_fwd_txfm2d_16x16_sse2(const int16_t *input, int32_t *output,
|
||||
const int stride, const TXFM_2D_CFG *cfg,
|
||||
const int bd) {
|
||||
int32_t txfm_buf[256];
|
||||
(void)bd;
|
||||
fwd_txfm2d_sse2(input, output, stride, cfg, txfm_buf);
|
||||
}
|
||||
|
||||
void vp10_fwd_txfm2d_32x32_sse2(const int16_t *input, int32_t *output,
|
||||
const int stride, const TXFM_2D_CFG *cfg,
|
||||
const int bd) {
|
||||
int32_t txfm_buf[1024];
|
||||
(void)bd;
|
||||
fwd_txfm2d_sse2(input, output, stride, cfg, txfm_buf);
|
||||
}
|
||||
|
||||
void vp10_fwd_txfm2d_64x64_sse2(const int16_t *input, int32_t *output,
|
||||
const int stride, const TXFM_2D_CFG *cfg,
|
||||
const int bd) {
|
||||
int32_t txfm_buf[4096];
|
||||
(void)bd;
|
||||
fwd_txfm2d_sse2(input, output, stride, cfg, txfm_buf);
|
||||
}
|
|
@ -0,0 +1,165 @@
|
|||
#ifndef VP10_TXMF1D_SSE2_H_
|
||||
#define VP10_TXMF1D_SSE2_H_
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include "vp10/common/vp10_txfm.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp10_fdct4_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_fdct8_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_fdct16_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_fdct32_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_fdct64_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
|
||||
void vp10_fadst4_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_fadst8_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_fadst16_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_fadst32_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
|
||||
void vp10_idct4_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_idct8_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_idct16_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_idct32_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_idct64_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
|
||||
void vp10_iadst4_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_iadst8_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_iadst16_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
void vp10_iadst32_new_sse2(const __m128i* input, __m128i* output,
|
||||
const int8_t* cos_bit, const int8_t* stage_range);
|
||||
|
||||
static INLINE void transpose_32_4x4(int stride, const __m128i* input,
|
||||
__m128i* output) {
|
||||
__m128i temp0 = _mm_unpacklo_epi32(input[0 * stride], input[2 * stride]);
|
||||
__m128i temp1 = _mm_unpackhi_epi32(input[0 * stride], input[2 * stride]);
|
||||
__m128i temp2 = _mm_unpacklo_epi32(input[1 * stride], input[3 * stride]);
|
||||
__m128i temp3 = _mm_unpackhi_epi32(input[1 * stride], input[3 * stride]);
|
||||
|
||||
output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2);
|
||||
output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2);
|
||||
output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3);
|
||||
output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3);
|
||||
}
|
||||
|
||||
// the entire input block can be represent by a grid of 4x4 blocks
|
||||
// each 4x4 blocks can be represent by 4 vertical __m128i
|
||||
// we first transpose each 4x4 block internally
|
||||
// than transpose the grid
|
||||
static INLINE void transpose_32(int txfm_size, const __m128i* input,
|
||||
__m128i* output) {
|
||||
const int num_per_128 = 4;
|
||||
const int row_size = txfm_size;
|
||||
const int col_size = txfm_size / num_per_128;
|
||||
int r, c;
|
||||
|
||||
// transpose each 4x4 block internally
|
||||
for (r = 0; r < row_size; r += 4) {
|
||||
for (c = 0; c < col_size; c++) {
|
||||
transpose_32_4x4(col_size, &input[r * col_size + c],
|
||||
&output[c * 4 * col_size + r / 4]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define mullo_epi32(a, b) \
|
||||
({ \
|
||||
__m128i tmp1 = _mm_mul_epu32(a, b); \
|
||||
__m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); \
|
||||
_mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), \
|
||||
_mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); \
|
||||
})
|
||||
|
||||
#define round_shift_32_simple_sse2(input, bit) \
|
||||
({ \
|
||||
__m128i round = _mm_set1_epi32((1 << (bit - 1)) - 1); \
|
||||
__m128i tmp1 = _mm_add_epi32(input, round); \
|
||||
_mm_srai_epi32(tmp1, bit); \
|
||||
})
|
||||
|
||||
#define round_shift_32_sse2(vec, bit) \
|
||||
({ \
|
||||
__m128i sign, tmp, round; \
|
||||
sign = _mm_srai_epi32(vec, 31); \
|
||||
tmp = _mm_add_epi32(vec, sign); \
|
||||
tmp = _mm_xor_si128(tmp, sign); \
|
||||
round = _mm_set1_epi32((1 << (bit - 1)) - 1); \
|
||||
tmp = _mm_add_epi32(tmp, round); \
|
||||
tmp = _mm_srli_epi32(tmp, bit); \
|
||||
tmp = _mm_xor_si128(tmp, sign); \
|
||||
_mm_sub_epi32(tmp, sign); \
|
||||
})
|
||||
|
||||
#define round_shift_array_32_sse2(input, output, size, bit) \
|
||||
({ \
|
||||
if (bit > 0) { \
|
||||
int i; \
|
||||
for (i = 0; i < size; i++) { \
|
||||
output[i] = round_shift_32_sse2(input[i], bit); \
|
||||
} \
|
||||
} else { \
|
||||
int i; \
|
||||
for (i = 0; i < size; i++) { \
|
||||
output[i] = _mm_slli_epi32(input[i], -bit); \
|
||||
} \
|
||||
} \
|
||||
})
|
||||
|
||||
// out0 = in0*w0 + in1*w1
|
||||
// out1 = -in1*w0 + in0*w1
|
||||
#define btf_32_sse2_type0(w0, w1, in0, in1, out0, out1, bit) \
|
||||
({ \
|
||||
__m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
|
||||
ww0 = _mm_set1_epi32(w0); \
|
||||
ww1 = _mm_set1_epi32(w1); \
|
||||
in0_w0 = mullo_epi32(in0, ww0); \
|
||||
in1_w1 = mullo_epi32(in1, ww1); \
|
||||
out0 = _mm_add_epi32(in0_w0, in1_w1); \
|
||||
out0 = round_shift_32_sse2(out0, bit); \
|
||||
in0_w1 = mullo_epi32(in0, ww1); \
|
||||
in1_w0 = mullo_epi32(in1, ww0); \
|
||||
out1 = _mm_sub_epi32(in0_w1, in1_w0); \
|
||||
out1 = round_shift_32_sse2(out1, bit); \
|
||||
})
|
||||
|
||||
// out0 = in0*w0 + in1*w1
|
||||
// out1 = in1*w0 - in0*w1
|
||||
#define btf_32_sse2_type1(w0, w1, in0, in1, out0, out1, bit) \
|
||||
({ \
|
||||
__m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
|
||||
ww0 = _mm_set1_epi32(w0); \
|
||||
ww1 = _mm_set1_epi32(w1); \
|
||||
in0_w0 = mullo_epi32(in0, ww0); \
|
||||
in1_w1 = mullo_epi32(in1, ww1); \
|
||||
out0 = _mm_add_epi32(in0_w0, in1_w1); \
|
||||
out0 = round_shift_32_sse2(out0, bit); \
|
||||
in0_w1 = mullo_epi32(in0, ww1); \
|
||||
in1_w0 = mullo_epi32(in1, ww0); \
|
||||
out1 = _mm_sub_epi32(in1_w0, in0_w1); \
|
||||
out1 = round_shift_32_sse2(out1, bit); \
|
||||
})
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // VP10_TXMF1D_SSE2_H_
|
|
@ -110,6 +110,10 @@ VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c
|
|||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_sse2.c
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_dct32x32_impl_sse2.h
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_impl_sse2.h
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_txfm1d_sse2.h
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm1d_sse2.h
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm1d_sse2.c
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm2d_sse2.c
|
||||
|
||||
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
|
||||
VP10_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iht4x4_add_neon.c
|
||||
|
|
Загрузка…
Ссылка в новой задаче