зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1860626 - Update dav1d to fd4ecc2fd870fa267e1995600dddf212c6e49300 r=chunmin
Differential Revision: https://phabricator.services.mozilla.com/D191676
This commit is contained in:
Родитель
41ede2b8ea
Коммит
abc43ce9c7
|
@ -20,11 +20,11 @@ origin:
|
|||
|
||||
# Human-readable identifier for this version/release
|
||||
# Generally "version NNN", "tag SSS", "bookmark SSS"
|
||||
release: e58afe4dd9057591882a01c31382c203e8a61c92 (2023-07-25T16:10:07.000+02:00).
|
||||
release: fd4ecc2fd870fa267e1995600dddf212c6e49300 (2023-10-19T17:00:20.000+02:00).
|
||||
|
||||
# Revision to pull in
|
||||
# Must be a long or short commit SHA (long preferred)
|
||||
revision: e58afe4dd9057591882a01c31382c203e8a61c92
|
||||
revision: fd4ecc2fd870fa267e1995600dddf212c6e49300
|
||||
|
||||
# The package's license, where possible using the mnemonic from
|
||||
# https://spdx.org/licenses/
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
/* auto-generated, do not edit */
|
||||
#define DAV1D_VERSION "e58afe4dd9057591882a01c31382c203e8a61c92"
|
||||
#define DAV1D_VERSION "fd4ecc2fd870fa267e1995600dddf212c6e49300"
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
Changes for 1.3.0 'Tundra Peregrine Falcon (Calidus)':
|
||||
------------------------------------------------------
|
||||
|
||||
1.3.0 is a medium release of dav1d, focus on new APIs and memory usage reduction.
|
||||
|
||||
- Reduce memory usage in numerous places
|
||||
- ABI break in Dav1dSequenceHeader, Dav1dFrameHeader, Dav1dContentLightLevel structures
|
||||
- new API function to check the API version: dav1d_version_api()
|
||||
- Rewrite of the SGR functions for ARM64 to be faster
|
||||
- NEON implemetation of save_tmvs for ARM32 and ARM64
|
||||
- x86 palette DSP for pal_idx_finish function
|
||||
|
||||
|
||||
Changes for 1.2.1 'Arctic Peregrine Falcon':
|
||||
-------------------------------------------
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
project('dav1d', ['c'],
|
||||
version: '1.2.1',
|
||||
version: '1.3.0',
|
||||
default_options: ['c_std=c99',
|
||||
'warning_level=2',
|
||||
'buildtype=release',
|
||||
|
|
|
@ -3529,6 +3529,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
br x9
|
||||
|
||||
2:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.h}[0], [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.h}[0], [x13], x1
|
||||
|
@ -3547,6 +3548,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
b 1b
|
||||
|
||||
4:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.s}[0], [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.s}[0], [x13], x1
|
||||
|
@ -3565,6 +3567,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
b 1b
|
||||
|
||||
8:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.8b}, [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.8b}, [x13], x1
|
||||
|
@ -3585,6 +3588,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
16:
|
||||
32:
|
||||
64:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.16b}, [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.16b}, [x13], x1
|
||||
|
|
|
@ -3664,6 +3664,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
br x9
|
||||
|
||||
2:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.s}[0], [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.s}[0], [x13], x1
|
||||
|
@ -3682,6 +3683,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
b 1b
|
||||
|
||||
4:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.4h}, [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.4h}, [x13], x1
|
||||
|
@ -3703,6 +3705,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
16:
|
||||
32:
|
||||
64:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.8h}, [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.8h}, [x13], x1
|
||||
|
|
|
@ -202,27 +202,27 @@ function save_tmvs_neon, export=1
|
|||
ret
|
||||
|
||||
10:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
add x16, x3, #4
|
||||
st1 {v0.s}[0], [x3]
|
||||
st1 {v0.b}[4], [x16]
|
||||
add x3, x3, #5
|
||||
ret
|
||||
20:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
add x16, x3, #8
|
||||
st1 {v0.d}[0], [x3]
|
||||
st1 {v0.h}[4], [x16]
|
||||
add x3, x3, #2*5
|
||||
ret
|
||||
40:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
st1 {v0.16b}, [x3]
|
||||
str s1, [x3, #16]
|
||||
add x3, x3, #4*5
|
||||
ret
|
||||
80:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This writes 6 full entries plus 2 extra bytes
|
||||
st1 {v0.16b, v1.16b}, [x3]
|
||||
// Write the last few, overlapping with the first write.
|
||||
|
@ -230,7 +230,7 @@ function save_tmvs_neon, export=1
|
|||
add x3, x3, #8*5
|
||||
ret
|
||||
160:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
add x16, x3, #6*5
|
||||
add x17, x3, #12*5
|
||||
// This writes 6 full entries plus 2 extra bytes
|
||||
|
|
|
@ -303,7 +303,7 @@ COLD int dav1d_mem_pool_init(const enum AllocationType type,
|
|||
*ppool = pool;
|
||||
return 0;
|
||||
}
|
||||
free(pool);
|
||||
dav1d_free(pool);
|
||||
}
|
||||
*ppool = NULL;
|
||||
return DAV1D_ERR(ENOMEM);
|
||||
|
|
|
@ -29,11 +29,7 @@
|
|||
|
||||
%if ARCH_X86_64
|
||||
|
||||
SECTION_RODATA 64
|
||||
pb_0to63: db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
db 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
|
||||
db 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47
|
||||
db 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63
|
||||
SECTION_RODATA 16
|
||||
scale_mask: db -1, -1, 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1
|
||||
scale_shift: dw 7, 7, 6, 6, 5, 5, 4, 4
|
||||
pw_27_17_17_27: dw 108, 68, 68, 108, 27, 17, 17, 27
|
||||
|
@ -53,6 +49,8 @@ uv_offset_mul: dd 256
|
|||
dd 1024
|
||||
pb_8_9_0_1: db 8, 9, 0, 1
|
||||
|
||||
cextern pb_0to63
|
||||
|
||||
SECTION .text
|
||||
|
||||
INIT_ZMM avx512icl
|
||||
|
@ -382,7 +380,7 @@ cglobal fguv_32x32xn_i%1_16bpc, 6, 15, 22, dst, src, stride, fg_data, w, scaling
|
|||
packssdw m4, m5, m5
|
||||
vpbroadcastd m21, [base+scale_shift+r9*8+4]
|
||||
%if %2
|
||||
mova m12, [base+pb_0to63] ; pw_even
|
||||
mova m12, [pb_0to63] ; pw_even
|
||||
mov r13d, 0x0101
|
||||
vpbroadcastq m10, [base+pw_23_22+r9*8]
|
||||
kmovw k3, r13d
|
||||
|
|
|
@ -137,6 +137,8 @@ static ALWAYS_INLINE void intra_pred_dsp_init_x86(Dav1dIntraPredDSPContext *cons
|
|||
init_angular_ipred_fn(LEFT_DC_PRED, ipred_dc_left, avx512icl);
|
||||
init_angular_ipred_fn(HOR_PRED, ipred_h, avx512icl);
|
||||
init_angular_ipred_fn(VERT_PRED, ipred_v, avx512icl);
|
||||
init_angular_ipred_fn(Z1_PRED, ipred_z1, avx512icl);
|
||||
init_angular_ipred_fn(Z3_PRED, ipred_z3, avx512icl);
|
||||
#endif
|
||||
init_angular_ipred_fn(PAETH_PRED, ipred_paeth, avx512icl);
|
||||
init_angular_ipred_fn(SMOOTH_PRED, ipred_smooth, avx512icl);
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -41,6 +41,10 @@ hmulC: dd 0, 1, 2, 3, 16, 17, 18, 19, 32, 33, 34, 35, 48, 49, 50, 51
|
|||
hmulD: dd 0, 1, 16, 17, 32, 33, 48, 49
|
||||
hshuf4:db 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
|
||||
|
||||
shift1: dq 0x0204081020408000
|
||||
shift3: dq 0x0810204080000000
|
||||
shift4: dq 0x1020408000000000
|
||||
|
||||
pb_1: times 4 db 1
|
||||
pb_2: times 4 db 2
|
||||
pb_3: times 4 db 3
|
||||
|
@ -49,9 +53,6 @@ pb_16: times 4 db 16
|
|||
pb_63: times 4 db 63
|
||||
pb_64: times 4 db 64
|
||||
pb_128: times 4 db 0x80
|
||||
pb_240: times 4 db 0xf0
|
||||
pb_248: times 4 db 0xf8
|
||||
pb_254: times 4 db 0xfe
|
||||
pb_2_1: times 2 db 2, 1
|
||||
pb_3_1: times 2 db 3, 1
|
||||
pb_7_1: times 2 db 7, 1
|
||||
|
@ -482,8 +483,7 @@ SECTION .text
|
|||
vpbroadcastb m1, [lutq+136]
|
||||
pminub m2, m1
|
||||
pmaxub m2, m15 ; I
|
||||
pand m1, m0, [pb_240]{bcstd}
|
||||
psrlq m1, 4 ; H
|
||||
gf2p8affineqb m1, m0, [shift4]{bcstq}, 0 ; H
|
||||
paddd m0, [pb_2]{bcstd}
|
||||
paddb m0, m0
|
||||
paddb m0, m2 ; E
|
||||
|
@ -534,8 +534,7 @@ SECTION .text
|
|||
ABSSUB m10, m3, m6, m11 ; abs(p1-q1)
|
||||
ABSSUB m11, m4, m5, m2 ; abs(p0-q0)
|
||||
paddusb m11, m11
|
||||
pand m10, [pb_254]{bcstd}
|
||||
psrlq m10, 1
|
||||
gf2p8affineqb m10, m10, [shift1]{bcstq}, 0
|
||||
paddusb m10, m11 ; abs(p0-q0)*2+(abs(p1-q1)>>1)
|
||||
vpcmpub k3{k3}, m10, m0, 2 ; abs(p0-q0)*2+(abs(p1-q1)>>1) <= E
|
||||
|
||||
|
@ -608,12 +607,8 @@ SECTION .text
|
|||
paddsb m10{k3}{z}, m10, m11 ; f=iclip_diff(3*(q0-p0)+f)&fm
|
||||
paddsb m8, m10, m15
|
||||
paddsb m10, m0
|
||||
pand m8, [pb_248]{bcstd}
|
||||
pand m10, [pb_248]{bcstd}
|
||||
psrlq m8, 3
|
||||
psrlq m10, 3
|
||||
pxor m8, m12
|
||||
pxor m10, m12
|
||||
gf2p8affineqb m8, m8, [shift3]{bcstq}, 16
|
||||
gf2p8affineqb m10, m10, [shift3]{bcstq}, 16
|
||||
psubb m8, m12 ; f2
|
||||
psubb m10, m12 ; f1
|
||||
paddsb m4, m8
|
||||
|
|
|
@ -32,15 +32,15 @@ SECTION_RODATA 32
|
|||
|
||||
sgr_lshuf3: db 0, 1, 0, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
|
||||
sgr_lshuf5: db 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
|
||||
wiener_lshuf5: db 4, 5, 4, 5, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
wiener_lshuf7: db 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
wiener_shufA: db 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11
|
||||
wiener_shufB: db 6, 7, 4, 5, 8, 9, 6, 7, 10, 11, 8, 9, 12, 13, 10, 11
|
||||
wiener_shufC: db 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15
|
||||
wiener_shufD: db 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1
|
||||
wiener_shufE: db 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15
|
||||
wiener_lshuf5: db 4, 5, 4, 5, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
wiener_lshuf7: db 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
pb_0to31: db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
db 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
|
||||
|
||||
wiener_hshift: dw 4, 4, 1, 1
|
||||
wiener_vshift: dw 1024, 1024, 4096, 4096
|
||||
|
@ -62,6 +62,7 @@ pd_0xf00801c7: dd 0xf00801c7
|
|||
|
||||
%define pw_256 sgr_lshuf5
|
||||
|
||||
cextern pb_0to63
|
||||
cextern sgr_x_by_x_avx2
|
||||
|
||||
SECTION .text
|
||||
|
@ -182,7 +183,7 @@ cglobal wiener_filter7_16bpc, 4, 15, 16, -384*12-16, dst, stride, left, lpf, \
|
|||
.extend_right:
|
||||
movd xm1, r10d
|
||||
vpbroadcastd m0, [pb_6_7]
|
||||
movu m2, [pb_0to31]
|
||||
mova m2, [pb_0to63]
|
||||
vpbroadcastb m1, xm1
|
||||
psubb m0, m1
|
||||
pminub m0, m2
|
||||
|
@ -406,9 +407,8 @@ cglobal wiener_filter5_16bpc, 4, 13, 16, 384*8+16, dst, stride, left, lpf, \
|
|||
vpbroadcastd m0, [base+wiener_hshift+t3*4]
|
||||
vpbroadcastd m9, [base+wiener_round+t3*4]
|
||||
vpbroadcastd m10, [base+wiener_vshift+t3*4]
|
||||
movu xm15, [wiener_lshuf5]
|
||||
mova m15, [wiener_lshuf5]
|
||||
pmullw m11, m0
|
||||
vinserti128 m15, [pb_0to31], 1
|
||||
pmullw m12, m0
|
||||
test edgeb, 4 ; LR_HAVE_TOP
|
||||
jz .no_top
|
||||
|
@ -486,7 +486,7 @@ cglobal wiener_filter5_16bpc, 4, 13, 16, 384*8+16, dst, stride, left, lpf, \
|
|||
vpbroadcastb m2, xm2
|
||||
psubb m0, m2
|
||||
psubb m1, m2
|
||||
movu m2, [pb_0to31]
|
||||
mova m2, [pb_0to63]
|
||||
pminub m0, m2
|
||||
pminub m1, m2
|
||||
pshufb m3, m0
|
||||
|
|
|
@ -31,11 +31,11 @@
|
|||
SECTION_RODATA 32
|
||||
|
||||
wiener_l_shuf: db 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
pb_0to31: db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
db 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
|
||||
db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
wiener_shufA: db 1, 7, 2, 8, 3, 9, 4, 10, 5, 11, 6, 12, 7, 13, 8, 14
|
||||
wiener_shufB: db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
|
||||
wiener_shufC: db 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 11, 13, 12
|
||||
sgr_l_shuf: db 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
|
||||
sgr_r_ext: times 16 db 1
|
||||
times 16 db 9
|
||||
|
||||
|
@ -64,7 +64,6 @@ pb_m5: times 4 db -5
|
|||
pb_3: times 4 db 3
|
||||
pw_5_6: dw 5, 6
|
||||
|
||||
sgr_l_shuf: db 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
|
||||
sgr_shuf: db 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 8, -1
|
||||
db 9, -1, 10, -1, 11, -1, 12, -1
|
||||
|
||||
|
@ -77,6 +76,8 @@ pd_m4096: dd -4096
|
|||
pd_0xf00801c7: dd 0xf00801c7
|
||||
pd_0xf00800a4: dd 0xf00800a4
|
||||
|
||||
cextern pb_0to63
|
||||
|
||||
SECTION .text
|
||||
|
||||
DECLARE_REG_TMP 8, 7, 9, 11, 12, 13, 14 ; ring buffer pointers
|
||||
|
@ -192,7 +193,7 @@ cglobal wiener_filter7_8bpc, 4, 15, 16, -384*12-16, dst, stride, left, lpf, \
|
|||
vpbroadcastd m0, [pb_3]
|
||||
vpbroadcastd m1, [pb_m5]
|
||||
vpbroadcastb m2, xm2
|
||||
movu m3, [pb_0to31]
|
||||
mova m3, [pb_0to63]
|
||||
psubb m0, m2
|
||||
psubb m1, m2
|
||||
pminub m0, m3
|
||||
|
@ -826,7 +827,7 @@ cglobal sgr_filter_5x5_8bpc, 4, 13, 16, 400*24+16, dst, stride, left, lpf, \
|
|||
mova m0, [sgr_r_ext]
|
||||
vpbroadcastb m2, xm2
|
||||
psubb m0, m2
|
||||
pminub m0, [pb_0to31]
|
||||
pminub m0, [pb_0to63]
|
||||
pshufb m5, m0
|
||||
ret
|
||||
.h: ; horizontal boxsum
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
|
||||
SECTION_RODATA 64
|
||||
|
||||
pb_0to63: db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
const pb_0to63, db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
%if ARCH_X86_64
|
||||
db 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
|
||||
db 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47
|
||||
|
|
Загрузка…
Ссылка в новой задаче