зеркало из https://github.com/electron/electron.git
3108669: arm,dsp: Fix 8bpp Dct64_NEON().
https://chromium-review.googlesource.com/c/codecs/libgav1/+/3108669
This commit is contained in:
Родитель
4cfc7299a5
Коммит
7ed3132312
|
@ -13,5 +13,7 @@
|
|||
|
||||
"src/electron/patches/Mantle": "src/third_party/squirrel.mac/vendor/Mantle",
|
||||
|
||||
"src/electron/patches/ReactiveObjC": "src/third_party/squirrel.mac/vendor/ReactiveObjC"
|
||||
"src/electron/patches/ReactiveObjC": "src/third_party/squirrel.mac/vendor/ReactiveObjC",
|
||||
|
||||
"src/electron/patches/libgav1": "src/third_party/libgav1/src"
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
arm_dsp_fix_8bpp_dct64_neon.patch
|
|
@ -0,0 +1,98 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Scott LaVarnway <slavarnway@google.com>
|
||||
Date: Sat, 14 Aug 2021 09:35:39 -0700
|
||||
Subject: arm,dsp: Fix 8bpp Dct64_NEON().
|
||||
|
||||
When building with clang (-march=armv8.2), an "off by one" error was detected. For now,
|
||||
disable the fast butterfly.
|
||||
|
||||
PiperOrigin-RevId: 390805568
|
||||
Change-Id: I9749a665b8235ec0f0dcbca46b284223c95af2a9
|
||||
|
||||
diff --git a/src/dsp/arm/inverse_transform_neon.cc b/src/dsp/arm/inverse_transform_neon.cc
|
||||
index 4af8cf7ff68810016cc777719f84888fe00abb1f..0371cd54b3d0f64040a67d8c776270637137134b 100644
|
||||
--- a/src/dsp/arm/inverse_transform_neon.cc
|
||||
+++ b/src/dsp/arm/inverse_transform_neon.cc
|
||||
@@ -388,6 +388,33 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_FirstIsZero(int16x8_t* a,
|
||||
int16x8_t* b,
|
||||
const int angle,
|
||||
const bool flip) {
|
||||
+#if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) && \
|
||||
+ defined(__clang__) // ARM v8.1-A
|
||||
+ // Clang optimizes vqrdmulhq_n_s16 and vqsubq_s16 (in HadamardRotation) into
|
||||
+ // vqrdmlshq_s16 resulting in an "off by one" error. For now, do not use
|
||||
+ // vqrdmulhq_n_s16().
|
||||
+ const int16_t cos128 = Cos128(angle);
|
||||
+ const int16_t sin128 = Sin128(angle);
|
||||
+ const int32x4_t x0 = vmull_n_s16(vget_low_s16(*b), -sin128);
|
||||
+ const int32x4_t y0 = vmull_n_s16(vget_low_s16(*b), cos128);
|
||||
+ const int16x4_t x1 = vqrshrn_n_s32(x0, 12);
|
||||
+ const int16x4_t y1 = vqrshrn_n_s32(y0, 12);
|
||||
+
|
||||
+ const int32x4_t x0_hi = vmull_n_s16(vget_high_s16(*b), -sin128);
|
||||
+ const int32x4_t y0_hi = vmull_n_s16(vget_high_s16(*b), cos128);
|
||||
+ const int16x4_t x1_hi = vqrshrn_n_s32(x0_hi, 12);
|
||||
+ const int16x4_t y1_hi = vqrshrn_n_s32(y0_hi, 12);
|
||||
+
|
||||
+ const int16x8_t x = vcombine_s16(x1, x1_hi);
|
||||
+ const int16x8_t y = vcombine_s16(y1, y1_hi);
|
||||
+ if (flip) {
|
||||
+ *a = y;
|
||||
+ *b = x;
|
||||
+ } else {
|
||||
+ *a = x;
|
||||
+ *b = y;
|
||||
+ }
|
||||
+#else
|
||||
const int16_t cos128 = Cos128(angle);
|
||||
const int16_t sin128 = Sin128(angle);
|
||||
// For this function, the max value returned by Sin128() is 4091, which fits
|
||||
@@ -403,12 +430,40 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_FirstIsZero(int16x8_t* a,
|
||||
*a = x;
|
||||
*b = y;
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
LIBGAV1_ALWAYS_INLINE void ButterflyRotation_SecondIsZero(int16x8_t* a,
|
||||
int16x8_t* b,
|
||||
const int angle,
|
||||
const bool flip) {
|
||||
+#if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) && \
|
||||
+ defined(__clang__) // ARM v8.1-A
|
||||
+ // Clang optimizes vqrdmulhq_n_s16 and vqsubq_s16 (in HadamardRotation) into
|
||||
+ // vqrdmlshq_s16 resulting in an "off by one" error. For now, do not use
|
||||
+ // vqrdmulhq_n_s16().
|
||||
+ const int16_t cos128 = Cos128(angle);
|
||||
+ const int16_t sin128 = Sin128(angle);
|
||||
+ const int32x4_t x0 = vmull_n_s16(vget_low_s16(*a), cos128);
|
||||
+ const int32x4_t y0 = vmull_n_s16(vget_low_s16(*a), sin128);
|
||||
+ const int16x4_t x1 = vqrshrn_n_s32(x0, 12);
|
||||
+ const int16x4_t y1 = vqrshrn_n_s32(y0, 12);
|
||||
+
|
||||
+ const int32x4_t x0_hi = vmull_n_s16(vget_high_s16(*a), cos128);
|
||||
+ const int32x4_t y0_hi = vmull_n_s16(vget_high_s16(*a), sin128);
|
||||
+ const int16x4_t x1_hi = vqrshrn_n_s32(x0_hi, 12);
|
||||
+ const int16x4_t y1_hi = vqrshrn_n_s32(y0_hi, 12);
|
||||
+
|
||||
+ const int16x8_t x = vcombine_s16(x1, x1_hi);
|
||||
+ const int16x8_t y = vcombine_s16(y1, y1_hi);
|
||||
+ if (flip) {
|
||||
+ *a = y;
|
||||
+ *b = x;
|
||||
+ } else {
|
||||
+ *a = x;
|
||||
+ *b = y;
|
||||
+ }
|
||||
+#else
|
||||
const int16_t cos128 = Cos128(angle);
|
||||
const int16_t sin128 = Sin128(angle);
|
||||
const int16x8_t x = vqrdmulhq_n_s16(*a, cos128 << 3);
|
||||
@@ -420,6 +475,7 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_SecondIsZero(int16x8_t* a,
|
||||
*a = x;
|
||||
*b = y;
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
LIBGAV1_ALWAYS_INLINE void HadamardRotation(int16x8_t* a, int16x8_t* b,
|
Загрузка…
Ссылка в новой задаче