Bug 1860626 - Update dav1d to fd4ecc2fd870fa267e1995600dddf212c6e49300 r=chunmin

Differential Revision: https://phabricator.services.mozilla.com/D191676
This commit is contained in:
Updatebot 2023-10-26 04:49:31 +00:00
Родитель 41ede2b8ea
Коммит abc43ce9c7
15 изменённых файлов: 1030 добавлений и 47 удалений

Просмотреть файл

@ -20,11 +20,11 @@ origin:
# Human-readable identifier for this version/release
# Generally "version NNN", "tag SSS", "bookmark SSS"
release: e58afe4dd9057591882a01c31382c203e8a61c92 (2023-07-25T16:10:07.000+02:00).
release: fd4ecc2fd870fa267e1995600dddf212c6e49300 (2023-10-19T17:00:20.000+02:00).
# Revision to pull in
# Must be a long or short commit SHA (long preferred)
revision: e58afe4dd9057591882a01c31382c203e8a61c92
revision: fd4ecc2fd870fa267e1995600dddf212c6e49300
# The package's license, where possible using the mnemonic from
# https://spdx.org/licenses/

Просмотреть файл

@ -1,2 +1,2 @@
/* auto-generated, do not edit */
#define DAV1D_VERSION "e58afe4dd9057591882a01c31382c203e8a61c92"
#define DAV1D_VERSION "fd4ecc2fd870fa267e1995600dddf212c6e49300"

13
third_party/dav1d/NEWS поставляемый
Просмотреть файл

@ -1,3 +1,16 @@
Changes for 1.3.0 'Tundra Peregrine Falcon (Calidus)':
------------------------------------------------------
1.3.0 is a medium release of dav1d, focus on new APIs and memory usage reduction.
- Reduce memory usage in numerous places
- ABI break in Dav1dSequenceHeader, Dav1dFrameHeader, Dav1dContentLightLevel structures
- new API function to check the API version: dav1d_version_api()
- Rewrite of the SGR functions for ARM64 to be faster
- NEON implemetation of save_tmvs for ARM32 and ARM64
- x86 palette DSP for pal_idx_finish function
Changes for 1.2.1 'Arctic Peregrine Falcon':
-------------------------------------------

2
third_party/dav1d/meson.build поставляемый
Просмотреть файл

@ -23,7 +23,7 @@
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
project('dav1d', ['c'],
version: '1.2.1',
version: '1.3.0',
default_options: ['c_std=c99',
'warning_level=2',
'buildtype=release',

4
third_party/dav1d/src/arm/64/ipred.S поставляемый
Просмотреть файл

@ -3529,6 +3529,7 @@ function ipred_z3_fill_padding_neon, export=0
br x9
2:
AARCH64_VALID_JUMP_TARGET
st1 {v31.h}[0], [x0], x1
subs w4, w4, #4
st1 {v31.h}[0], [x13], x1
@ -3547,6 +3548,7 @@ function ipred_z3_fill_padding_neon, export=0
b 1b
4:
AARCH64_VALID_JUMP_TARGET
st1 {v31.s}[0], [x0], x1
subs w4, w4, #4
st1 {v31.s}[0], [x13], x1
@ -3565,6 +3567,7 @@ function ipred_z3_fill_padding_neon, export=0
b 1b
8:
AARCH64_VALID_JUMP_TARGET
st1 {v31.8b}, [x0], x1
subs w4, w4, #4
st1 {v31.8b}, [x13], x1
@ -3585,6 +3588,7 @@ function ipred_z3_fill_padding_neon, export=0
16:
32:
64:
AARCH64_VALID_JUMP_TARGET
st1 {v31.16b}, [x0], x1
subs w4, w4, #4
st1 {v31.16b}, [x13], x1

3
third_party/dav1d/src/arm/64/ipred16.S поставляемый
Просмотреть файл

@ -3664,6 +3664,7 @@ function ipred_z3_fill_padding_neon, export=0
br x9
2:
AARCH64_VALID_JUMP_TARGET
st1 {v31.s}[0], [x0], x1
subs w4, w4, #4
st1 {v31.s}[0], [x13], x1
@ -3682,6 +3683,7 @@ function ipred_z3_fill_padding_neon, export=0
b 1b
4:
AARCH64_VALID_JUMP_TARGET
st1 {v31.4h}, [x0], x1
subs w4, w4, #4
st1 {v31.4h}, [x13], x1
@ -3703,6 +3705,7 @@ function ipred_z3_fill_padding_neon, export=0
16:
32:
64:
AARCH64_VALID_JUMP_TARGET
st1 {v31.8h}, [x0], x1
subs w4, w4, #4
st1 {v31.8h}, [x13], x1

10
third_party/dav1d/src/arm/64/refmvs.S поставляемый
Просмотреть файл

@ -202,27 +202,27 @@ function save_tmvs_neon, export=1
ret
10:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
add x16, x3, #4
st1 {v0.s}[0], [x3]
st1 {v0.b}[4], [x16]
add x3, x3, #5
ret
20:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
add x16, x3, #8
st1 {v0.d}[0], [x3]
st1 {v0.h}[4], [x16]
add x3, x3, #2*5
ret
40:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
st1 {v0.16b}, [x3]
str s1, [x3, #16]
add x3, x3, #4*5
ret
80:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
// This writes 6 full entries plus 2 extra bytes
st1 {v0.16b, v1.16b}, [x3]
// Write the last few, overlapping with the first write.
@ -230,7 +230,7 @@ function save_tmvs_neon, export=1
add x3, x3, #8*5
ret
160:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
add x16, x3, #6*5
add x17, x3, #12*5
// This writes 6 full entries plus 2 extra bytes

2
third_party/dav1d/src/mem.c поставляемый
Просмотреть файл

@ -303,7 +303,7 @@ COLD int dav1d_mem_pool_init(const enum AllocationType type,
*ppool = pool;
return 0;
}
free(pool);
dav1d_free(pool);
}
*ppool = NULL;
return DAV1D_ERR(ENOMEM);

Просмотреть файл

@ -29,11 +29,7 @@
%if ARCH_X86_64
SECTION_RODATA 64
pb_0to63: db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
db 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
db 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47
db 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63
SECTION_RODATA 16
scale_mask: db -1, -1, 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1
scale_shift: dw 7, 7, 6, 6, 5, 5, 4, 4
pw_27_17_17_27: dw 108, 68, 68, 108, 27, 17, 17, 27
@ -53,6 +49,8 @@ uv_offset_mul: dd 256
dd 1024
pb_8_9_0_1: db 8, 9, 0, 1
cextern pb_0to63
SECTION .text
INIT_ZMM avx512icl
@ -382,7 +380,7 @@ cglobal fguv_32x32xn_i%1_16bpc, 6, 15, 22, dst, src, stride, fg_data, w, scaling
packssdw m4, m5, m5
vpbroadcastd m21, [base+scale_shift+r9*8+4]
%if %2
mova m12, [base+pb_0to63] ; pw_even
mova m12, [pb_0to63] ; pw_even
mov r13d, 0x0101
vpbroadcastq m10, [base+pw_23_22+r9*8]
kmovw k3, r13d

2
third_party/dav1d/src/x86/ipred.h поставляемый
Просмотреть файл

@ -137,6 +137,8 @@ static ALWAYS_INLINE void intra_pred_dsp_init_x86(Dav1dIntraPredDSPContext *cons
init_angular_ipred_fn(LEFT_DC_PRED, ipred_dc_left, avx512icl);
init_angular_ipred_fn(HOR_PRED, ipred_h, avx512icl);
init_angular_ipred_fn(VERT_PRED, ipred_v, avx512icl);
init_angular_ipred_fn(Z1_PRED, ipred_z1, avx512icl);
init_angular_ipred_fn(Z3_PRED, ipred_z3, avx512icl);
#endif
init_angular_ipred_fn(PAETH_PRED, ipred_paeth, avx512icl);
init_angular_ipred_fn(SMOOTH_PRED, ipred_smooth, avx512icl);

975
third_party/dav1d/src/x86/ipred_avx512.asm поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -41,6 +41,10 @@ hmulC: dd 0, 1, 2, 3, 16, 17, 18, 19, 32, 33, 34, 35, 48, 49, 50, 51
hmulD: dd 0, 1, 16, 17, 32, 33, 48, 49
hshuf4:db 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
shift1: dq 0x0204081020408000
shift3: dq 0x0810204080000000
shift4: dq 0x1020408000000000
pb_1: times 4 db 1
pb_2: times 4 db 2
pb_3: times 4 db 3
@ -49,9 +53,6 @@ pb_16: times 4 db 16
pb_63: times 4 db 63
pb_64: times 4 db 64
pb_128: times 4 db 0x80
pb_240: times 4 db 0xf0
pb_248: times 4 db 0xf8
pb_254: times 4 db 0xfe
pb_2_1: times 2 db 2, 1
pb_3_1: times 2 db 3, 1
pb_7_1: times 2 db 7, 1
@ -482,8 +483,7 @@ SECTION .text
vpbroadcastb m1, [lutq+136]
pminub m2, m1
pmaxub m2, m15 ; I
pand m1, m0, [pb_240]{bcstd}
psrlq m1, 4 ; H
gf2p8affineqb m1, m0, [shift4]{bcstq}, 0 ; H
paddd m0, [pb_2]{bcstd}
paddb m0, m0
paddb m0, m2 ; E
@ -534,8 +534,7 @@ SECTION .text
ABSSUB m10, m3, m6, m11 ; abs(p1-q1)
ABSSUB m11, m4, m5, m2 ; abs(p0-q0)
paddusb m11, m11
pand m10, [pb_254]{bcstd}
psrlq m10, 1
gf2p8affineqb m10, m10, [shift1]{bcstq}, 0
paddusb m10, m11 ; abs(p0-q0)*2+(abs(p1-q1)>>1)
vpcmpub k3{k3}, m10, m0, 2 ; abs(p0-q0)*2+(abs(p1-q1)>>1) <= E
@ -608,12 +607,8 @@ SECTION .text
paddsb m10{k3}{z}, m10, m11 ; f=iclip_diff(3*(q0-p0)+f)&fm
paddsb m8, m10, m15
paddsb m10, m0
pand m8, [pb_248]{bcstd}
pand m10, [pb_248]{bcstd}
psrlq m8, 3
psrlq m10, 3
pxor m8, m12
pxor m10, m12
gf2p8affineqb m8, m8, [shift3]{bcstq}, 16
gf2p8affineqb m10, m10, [shift3]{bcstq}, 16
psubb m8, m12 ; f2
psubb m10, m12 ; f1
paddsb m4, m8

Просмотреть файл

@ -32,15 +32,15 @@ SECTION_RODATA 32
sgr_lshuf3: db 0, 1, 0, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
sgr_lshuf5: db 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
wiener_lshuf5: db 4, 5, 4, 5, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
wiener_lshuf7: db 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 10, 11, 12, 13, 14, 15
db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
wiener_shufA: db 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11
wiener_shufB: db 6, 7, 4, 5, 8, 9, 6, 7, 10, 11, 8, 9, 12, 13, 10, 11
wiener_shufC: db 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15
wiener_shufD: db 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1
wiener_shufE: db 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15
wiener_lshuf5: db 4, 5, 4, 5, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
wiener_lshuf7: db 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 10, 11, 12, 13, 14, 15
pb_0to31: db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
db 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
wiener_hshift: dw 4, 4, 1, 1
wiener_vshift: dw 1024, 1024, 4096, 4096
@ -62,6 +62,7 @@ pd_0xf00801c7: dd 0xf00801c7
%define pw_256 sgr_lshuf5
cextern pb_0to63
cextern sgr_x_by_x_avx2
SECTION .text
@ -182,7 +183,7 @@ cglobal wiener_filter7_16bpc, 4, 15, 16, -384*12-16, dst, stride, left, lpf, \
.extend_right:
movd xm1, r10d
vpbroadcastd m0, [pb_6_7]
movu m2, [pb_0to31]
mova m2, [pb_0to63]
vpbroadcastb m1, xm1
psubb m0, m1
pminub m0, m2
@ -406,9 +407,8 @@ cglobal wiener_filter5_16bpc, 4, 13, 16, 384*8+16, dst, stride, left, lpf, \
vpbroadcastd m0, [base+wiener_hshift+t3*4]
vpbroadcastd m9, [base+wiener_round+t3*4]
vpbroadcastd m10, [base+wiener_vshift+t3*4]
movu xm15, [wiener_lshuf5]
mova m15, [wiener_lshuf5]
pmullw m11, m0
vinserti128 m15, [pb_0to31], 1
pmullw m12, m0
test edgeb, 4 ; LR_HAVE_TOP
jz .no_top
@ -486,7 +486,7 @@ cglobal wiener_filter5_16bpc, 4, 13, 16, 384*8+16, dst, stride, left, lpf, \
vpbroadcastb m2, xm2
psubb m0, m2
psubb m1, m2
movu m2, [pb_0to31]
mova m2, [pb_0to63]
pminub m0, m2
pminub m1, m2
pshufb m3, m0

Просмотреть файл

@ -31,11 +31,11 @@
SECTION_RODATA 32
wiener_l_shuf: db 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
pb_0to31: db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
db 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
wiener_shufA: db 1, 7, 2, 8, 3, 9, 4, 10, 5, 11, 6, 12, 7, 13, 8, 14
wiener_shufB: db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
wiener_shufC: db 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 11, 13, 12
sgr_l_shuf: db 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
sgr_r_ext: times 16 db 1
times 16 db 9
@ -64,7 +64,6 @@ pb_m5: times 4 db -5
pb_3: times 4 db 3
pw_5_6: dw 5, 6
sgr_l_shuf: db 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
sgr_shuf: db 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 8, -1
db 9, -1, 10, -1, 11, -1, 12, -1
@ -77,6 +76,8 @@ pd_m4096: dd -4096
pd_0xf00801c7: dd 0xf00801c7
pd_0xf00800a4: dd 0xf00800a4
cextern pb_0to63
SECTION .text
DECLARE_REG_TMP 8, 7, 9, 11, 12, 13, 14 ; ring buffer pointers
@ -192,7 +193,7 @@ cglobal wiener_filter7_8bpc, 4, 15, 16, -384*12-16, dst, stride, left, lpf, \
vpbroadcastd m0, [pb_3]
vpbroadcastd m1, [pb_m5]
vpbroadcastb m2, xm2
movu m3, [pb_0to31]
mova m3, [pb_0to63]
psubb m0, m2
psubb m1, m2
pminub m0, m3
@ -826,7 +827,7 @@ cglobal sgr_filter_5x5_8bpc, 4, 13, 16, 400*24+16, dst, stride, left, lpf, \
mova m0, [sgr_r_ext]
vpbroadcastb m2, xm2
psubb m0, m2
pminub m0, [pb_0to31]
pminub m0, [pb_0to63]
pshufb m5, m0
ret
.h: ; horizontal boxsum

2
third_party/dav1d/src/x86/pal.asm поставляемый
Просмотреть файл

@ -28,7 +28,7 @@
SECTION_RODATA 64
pb_0to63: db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
const pb_0to63, db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
%if ARCH_X86_64
db 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
db 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47