Merge remote branch 'internal/upstream' into HEAD
This commit is contained in:
Коммит
39e36f8604
|
@ -980,6 +980,9 @@ EOF
|
|||
esac
|
||||
fi
|
||||
|
||||
# for sysconf(3) and friends.
|
||||
check_header unistd.h
|
||||
|
||||
# glibc needs these
|
||||
if enabled linux; then
|
||||
add_cflags -D_LARGEFILE_SOURCE
|
||||
|
|
|
@ -211,6 +211,7 @@ HAVE_LIST="
|
|||
alt_tree_layout
|
||||
pthread_h
|
||||
sys_mman_h
|
||||
unistd_h
|
||||
"
|
||||
EXPERIMENT_LIST="
|
||||
extend_qrange
|
||||
|
|
|
@ -308,7 +308,6 @@
|
|||
; q9 q2
|
||||
; q10 q3
|
||||
|vp8_loop_filter_neon| PROC
|
||||
ldr r12, _lf_coeff_
|
||||
|
||||
; vp8_filter_mask
|
||||
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
|
||||
|
@ -339,7 +338,7 @@
|
|||
vqadd.u8 q9, q9, q2 ; a = b + a
|
||||
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
vld1.u8 {q0}, [r12]!
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
|
||||
; vp8_filter() function
|
||||
; convert to signed
|
||||
|
@ -348,7 +347,7 @@
|
|||
veor q5, q5, q0 ; ps1
|
||||
veor q8, q8, q0 ; qs1
|
||||
|
||||
vld1.u8 {q10}, [r12]!
|
||||
vmov.u8 q10, #3 ; #3
|
||||
|
||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||
vsubl.s8 q11, d15, d13
|
||||
|
@ -367,7 +366,7 @@
|
|||
vaddw.s8 q2, q2, d2
|
||||
vaddw.s8 q11, q11, d3
|
||||
|
||||
vld1.u8 {q9}, [r12]!
|
||||
vmov.u8 q9, #4 ; #4
|
||||
|
||||
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d2, q2
|
||||
|
@ -399,12 +398,4 @@
|
|||
|
||||
;-----------------
|
||||
|
||||
_lf_coeff_
|
||||
DCD lf_coeff
|
||||
lf_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
|
||||
|
||||
END
|
||||
|
|
|
@ -22,20 +22,19 @@
|
|||
; r1 int p, //pitch
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; stack(r4) const signed char *thresh,
|
||||
; stack(r4) const signed char *thresh (unused)
|
||||
; //stack(r5) int count --unused
|
||||
|
||||
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
|
||||
sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines
|
||||
|
||||
ldr r12, _lfhy_coeff_
|
||||
vld1.u8 {q5}, [r0], r1 ; p1
|
||||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
||||
vld1.u8 {q6}, [r0], r1 ; p0
|
||||
vld1.u8 {q0}, [r12]! ; 0x80
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vld1.u8 {q7}, [r0], r1 ; q0
|
||||
vld1.u8 {q10}, [r12]! ; 0x03
|
||||
vmov.u8 q10, #0x03 ; 0x03
|
||||
vld1.u8 {q8}, [r0] ; q1
|
||||
|
||||
;vp8_filter_mask() function
|
||||
|
@ -66,7 +65,7 @@
|
|||
vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q12, q3, q3
|
||||
|
||||
vld1.u8 {q9}, [r12]! ; 0x04
|
||||
vmov.u8 q9, #0x04 ; 0x04
|
||||
|
||||
vadd.s16 q2, q2, q11
|
||||
vadd.s16 q3, q3, q12
|
||||
|
@ -105,11 +104,4 @@
|
|||
|
||||
;-----------------
|
||||
|
||||
_lfhy_coeff_
|
||||
DCD lfhy_coeff
|
||||
lfhy_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
|
||||
END
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
; r1 int p, //pitch
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; stack(r4) const signed char *thresh,
|
||||
; stack(r4) const signed char *thresh (unused)
|
||||
; //stack(r5) int count --unused
|
||||
|
||||
|vp8_loop_filter_simple_vertical_edge_neon| PROC
|
||||
|
@ -32,7 +32,6 @@
|
|||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
||||
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
|
||||
ldr r12, _vlfy_coeff_
|
||||
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
|
||||
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
|
||||
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
|
||||
|
@ -41,11 +40,11 @@
|
|||
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
|
||||
|
||||
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vld1.u8 {q0}, [r12]! ; 0x80
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
||||
vld1.u8 {q11}, [r12]! ; 0x03
|
||||
vmov.u8 q11, #0x03 ; 0x03
|
||||
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vld1.u8 {q12}, [r12]! ; 0x04
|
||||
vmov.u8 q12, #0x04 ; 0x04
|
||||
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
||||
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
||||
|
@ -146,11 +145,4 @@
|
|||
|
||||
;-----------------
|
||||
|
||||
_vlfy_coeff_
|
||||
DCD vlfy_coeff
|
||||
vlfy_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
|
||||
END
|
||||
|
|
|
@ -372,7 +372,6 @@
|
|||
; q10 q3
|
||||
|
||||
|vp8_mbloop_filter_neon| PROC
|
||||
ldr r12, _mblf_coeff_
|
||||
|
||||
; vp8_filter_mask
|
||||
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
|
||||
|
@ -396,7 +395,7 @@
|
|||
|
||||
vld1.s8 {d4[], d5[]}, [r2] ; flimit
|
||||
|
||||
vld1.u8 {q0}, [r12]!
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
|
||||
vadd.u8 q2, q2, q2 ; flimit * 2
|
||||
vadd.u8 q2, q2, q1 ; flimit * 2 + limit
|
||||
|
@ -431,12 +430,12 @@
|
|||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q13, q13, q11
|
||||
|
||||
vld1.u8 {q12}, [r12]! ; #3
|
||||
vmov.u8 q12, #3 ; #3
|
||||
|
||||
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q13, q13, d3
|
||||
|
||||
vld1.u8 {q11}, [r12]! ; #4
|
||||
vmov.u8 q11, #4 ; #4
|
||||
|
||||
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d2, q2
|
||||
|
@ -444,16 +443,16 @@
|
|||
|
||||
vand q1, q1, q15 ; vp8_filter &= mask
|
||||
|
||||
vld1.u8 {q15}, [r12]! ; #63
|
||||
;
|
||||
vmov.u16 q15, #63 ; #63
|
||||
|
||||
vand q13, q1, q14 ; Filter2 &= hev
|
||||
|
||||
vld1.u8 {d7}, [r12]! ; #9
|
||||
vmov.u8 d7, #9 ; #9
|
||||
|
||||
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
|
||||
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
|
||||
|
||||
vld1.u8 {d6}, [r12]! ; #18
|
||||
vmov.u8 d6, #18 ; #18
|
||||
|
||||
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
|
||||
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
|
||||
|
@ -463,7 +462,7 @@
|
|||
|
||||
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
|
||||
|
||||
vld1.u8 {d5}, [r12]! ; #27
|
||||
vmov.u8 d5, #27 ; #27
|
||||
|
||||
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
|
||||
|
||||
|
@ -507,14 +506,4 @@
|
|||
|
||||
;-----------------
|
||||
|
||||
_mblf_coeff_
|
||||
DCD mblf_coeff
|
||||
mblf_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
|
||||
DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
|
||||
DCD 0x1b1b1b1b, 0x1b1b1b1b
|
||||
|
||||
END
|
||||
|
|
|
@ -17,9 +17,54 @@
|
|||
#include "vp8/common/idct.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#if HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
|
||||
extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
static int get_cpu_count()
|
||||
{
|
||||
int core_count = 16;
|
||||
|
||||
#if HAVE_UNISTD_H
|
||||
#if defined(_SC_NPROCESSORS_ONLN)
|
||||
core_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif defined(_SC_NPROC_ONLN)
|
||||
core_count = sysconf(_SC_NPROC_ONLN);
|
||||
#endif
|
||||
#elif defined(_WIN32)
|
||||
{
|
||||
PGNSI pGNSI;
|
||||
SYSTEM_INFO sysinfo;
|
||||
|
||||
/* Call GetNativeSystemInfo if supported or
|
||||
* GetSystemInfo otherwise. */
|
||||
|
||||
pGNSI = (PGNSI) GetProcAddress(
|
||||
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
|
||||
if (pGNSI != NULL)
|
||||
pGNSI(&sysinfo);
|
||||
else
|
||||
GetSystemInfo(&sysinfo);
|
||||
|
||||
core_count = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#else
|
||||
/* other platforms */
|
||||
#endif
|
||||
|
||||
return core_count > 0 ? core_count : 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -98,4 +143,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||
#endif
|
||||
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
ctx->processor_core_count = get_cpu_count();
|
||||
#endif /* CONFIG_MULTITHREAD */
|
||||
}
|
||||
|
|
|
@ -195,6 +195,9 @@ typedef struct VP8Common
|
|||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
VP8_COMMON_RTCD rtcd;
|
||||
#endif
|
||||
#if CONFIG_MULTITHREAD
|
||||
int processor_core_count;
|
||||
#endif
|
||||
struct postproc_state postproc_state;
|
||||
} VP8_COMMON;
|
||||
|
|
|
@ -439,12 +439,18 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
|
|||
|
||||
pbi->b_multithreaded_rd = 0;
|
||||
pbi->allocated_decoding_thread_count = 0;
|
||||
core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
|
||||
|
||||
/* limit decoding threads to the max number of token partitions */
|
||||
core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
|
||||
|
||||
/* limit decoding threads to the available cores */
|
||||
if (core_count > pbi->common.processor_core_count)
|
||||
core_count = pbi->common.processor_core_count;
|
||||
|
||||
if (core_count > 1)
|
||||
{
|
||||
pbi->b_multithreaded_rd = 1;
|
||||
pbi->decoding_thread_count = core_count -1;
|
||||
pbi->decoding_thread_count = core_count - 1;
|
||||
|
||||
CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
|
||||
CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
|
||||
|
|
|
@ -459,15 +459,15 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
|||
|
||||
cpi->b_multi_threaded = 0;
|
||||
cpi->encoding_thread_count = 0;
|
||||
cpi->processor_core_count = 32; //vp8_get_proc_core_count();
|
||||
|
||||
if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
|
||||
if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
|
||||
{
|
||||
int ithread;
|
||||
int th_count = cpi->oxcf.multi_threaded - 1;
|
||||
|
||||
if (cpi->oxcf.multi_threaded > cpi->processor_core_count)
|
||||
th_count = cpi->processor_core_count - 1;
|
||||
/* don't allocate more threads than cores available */
|
||||
if (cpi->oxcf.multi_threaded > cm->processor_core_count)
|
||||
th_count = cm->processor_core_count - 1;
|
||||
|
||||
/* we have th_count + 1 (main) threads processing one row each */
|
||||
/* no point to have more threads than the sync range allows */
|
||||
|
@ -514,6 +514,7 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
|||
LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;
|
||||
|
||||
sem_init(&cpi->h_event_start_lpf, 0, 0);
|
||||
sem_init(&cpi->h_event_end_picklpf, 0, 0);
|
||||
sem_init(&cpi->h_event_end_lpf, 0, 0);
|
||||
|
||||
lpfthd->ptr1 = (void *)cpi;
|
||||
|
@ -547,6 +548,7 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
|
|||
|
||||
sem_destroy(&cpi->h_event_end_encoding);
|
||||
sem_destroy(&cpi->h_event_end_lpf);
|
||||
sem_destroy(&cpi->h_event_end_picklpf);
|
||||
sem_destroy(&cpi->h_event_start_lpf);
|
||||
|
||||
//free thread related resources
|
||||
|
|
|
@ -3211,7 +3211,7 @@ void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
|
|||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded)
|
||||
sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */
|
||||
sem_post(&cpi->h_event_end_picklpf); /* signal that we have set filter_level */
|
||||
#endif
|
||||
|
||||
if (cm->filter_level > 0)
|
||||
|
@ -4221,7 +4221,7 @@ static void encode_frame_to_data_rate
|
|||
#if CONFIG_MULTITHREAD
|
||||
/* wait that filter_level is picked so that we can continue with stream packing */
|
||||
if (cpi->b_multi_threaded)
|
||||
sem_wait(&cpi->h_event_end_lpf);
|
||||
sem_wait(&cpi->h_event_end_picklpf);
|
||||
#endif
|
||||
|
||||
// build the bitstream
|
||||
|
|
|
@ -580,7 +580,6 @@ typedef struct
|
|||
// multithread data
|
||||
int * mt_current_mb_col;
|
||||
int mt_sync_range;
|
||||
int processor_core_count;
|
||||
int b_multi_threaded;
|
||||
int encoding_thread_count;
|
||||
|
||||
|
@ -595,6 +594,7 @@ typedef struct
|
|||
sem_t *h_event_start_encoding;
|
||||
sem_t h_event_end_encoding;
|
||||
sem_t h_event_start_lpf;
|
||||
sem_t h_event_end_picklpf;
|
||||
sem_t h_event_end_lpf;
|
||||
#endif
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче