proposed-aom : remove vp8, vp9 from directory configure and vpx
Change-Id: Ibb9192913f235ace2f6b8ef3c0eeabc3fd4cab11
This commit is contained in:
Родитель
cb785ade5b
Коммит
f0d007abdc
|
@ -38,8 +38,6 @@ Advanced options:
|
|||
${toggle_better_hw_compatibility}
|
||||
enable encoder to produce streams with better
|
||||
hardware decoder compatibility
|
||||
${toggle_vp8} VP8 codec support
|
||||
${toggle_vp9} VP9 codec support
|
||||
${toggle_vp10} VP10 codec support
|
||||
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
|
||||
${toggle_postproc} postprocessing
|
||||
|
@ -195,8 +193,6 @@ if [ ${doxy_major:-0} -ge 1 ]; then
|
|||
fi
|
||||
|
||||
# disable codecs when their source directory does not exist
|
||||
[ -d "${source_path}/vp8" ] || disable_feature vp8
|
||||
[ -d "${source_path}/vp9" ] || disable_feature vp9
|
||||
[ -d "${source_path}/vp10" ] || disable_feature vp10
|
||||
|
||||
# disable vp10 codec by default
|
||||
|
@ -218,16 +214,10 @@ enable_feature os_support
|
|||
enable_feature temporal_denoising
|
||||
|
||||
CODECS="
|
||||
vp8_encoder
|
||||
vp8_decoder
|
||||
vp9_encoder
|
||||
vp9_decoder
|
||||
vp10_encoder
|
||||
vp10_decoder
|
||||
"
|
||||
CODEC_FAMILIES="
|
||||
vp8
|
||||
vp9
|
||||
vp10
|
||||
"
|
||||
|
||||
|
|
|
@ -1,190 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "alloccommon.h"
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "findnearmv.h"
|
||||
#include "entropymode.h"
|
||||
#include "systemdependent.h"
|
||||
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
|
||||
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
|
||||
#if CONFIG_POSTPROC
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
|
||||
if (oci->post_proc_buffer_int_used)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
|
||||
|
||||
vpx_free(oci->pp_limits_buffer);
|
||||
oci->pp_limits_buffer = NULL;
|
||||
#endif
|
||||
|
||||
vpx_free(oci->above_context);
|
||||
vpx_free(oci->mip);
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
vpx_free(oci->prev_mip);
|
||||
oci->prev_mip = NULL;
|
||||
#endif
|
||||
|
||||
oci->above_context = NULL;
|
||||
oci->mip = NULL;
|
||||
}
|
||||
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
{
|
||||
int i;
|
||||
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
|
||||
/* our internal buffers are always multiples of 16 */
|
||||
if ((width & 0xf) != 0)
|
||||
width += 16 - (width & 0xf);
|
||||
|
||||
if ((height & 0xf) != 0)
|
||||
height += 16 - (height & 0xf);
|
||||
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
{
|
||||
oci->fb_idx_ref_cnt[i] = 0;
|
||||
oci->yv12_fb[i].flags = 0;
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
}
|
||||
|
||||
oci->new_fb_idx = 0;
|
||||
oci->lst_fb_idx = 1;
|
||||
oci->gld_fb_idx = 2;
|
||||
oci->alt_fb_idx = 3;
|
||||
|
||||
oci->fb_idx_ref_cnt[0] = 1;
|
||||
oci->fb_idx_ref_cnt[1] = 1;
|
||||
oci->fb_idx_ref_cnt[2] = 1;
|
||||
oci->fb_idx_ref_cnt[3] = 1;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->mb_rows = height >> 4;
|
||||
oci->mb_cols = width >> 4;
|
||||
oci->MBs = oci->mb_rows * oci->mb_cols;
|
||||
oci->mode_info_stride = oci->mb_cols + 1;
|
||||
oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
|
||||
|
||||
if (!oci->mip)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
|
||||
/* Allocation of previous mode info will be done in vp8_decode_frame()
|
||||
* as it is a decoder only data */
|
||||
|
||||
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
|
||||
|
||||
if (!oci->above_context)
|
||||
goto allocation_fail;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->post_proc_buffer_int_used = 0;
|
||||
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
|
||||
/* Allocate buffer to store post-processing filter coefficients.
|
||||
*
|
||||
* Note: Round up mb_cols to support SIMD reads
|
||||
*/
|
||||
oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
|
||||
if (!oci->pp_limits_buffer)
|
||||
goto allocation_fail;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
allocation_fail:
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void vp8_setup_version(VP8_COMMON *cm)
|
||||
{
|
||||
switch (cm->version)
|
||||
{
|
||||
case 0:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 1:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 2:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 3:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 1;
|
||||
break;
|
||||
default:
|
||||
/*4,5,6,7 are reserved for future use*/
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
void vp8_create_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_machine_specific_config(oci);
|
||||
|
||||
vp8_init_mbmode_probs(oci);
|
||||
vp8_default_bmode_probs(oci->fc.bmode_prob);
|
||||
|
||||
oci->mb_no_coeff_skip = 1;
|
||||
oci->no_lpf = 0;
|
||||
oci->filter_type = NORMAL_LOOPFILTER;
|
||||
oci->use_bilinear_mc_filter = 0;
|
||||
oci->full_pixel = 0;
|
||||
oci->multi_token_partition = ONE_PARTITION;
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
|
||||
/* Initialize reference frame sign bias structure to defaults */
|
||||
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
|
||||
/* Default disable buffer to buffer copying */
|
||||
oci->copy_buffer_to_gf = 0;
|
||||
oci->copy_buffer_to_arf = 0;
|
||||
}
|
||||
|
||||
void vp8_remove_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ALLOCCOMMON_H_
|
||||
#define VP8_COMMON_ALLOCCOMMON_H_
|
||||
|
||||
#include "onyxc_int.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_create_common(VP8_COMMON *oci);
|
||||
void vp8_remove_common(VP8_COMMON *oci);
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci);
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height);
|
||||
void vp8_setup_version(VP8_COMMON *oci);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ALLOCCOMMON_H_
|
|
@ -1,237 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_filter_block2d_bil_first_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_bil_second_pass_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *dst_ptr,
|
||||
; r2 unsigned int src_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; The output is transposed stroed in output array to make it easy for second pass filtering.
|
||||
|vp8_filter_block2d_bil_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
mov r12, r3 ; outer-loop counter
|
||||
|
||||
add r7, r2, r4 ; preload next row
|
||||
pld [r0, r7]
|
||||
|
||||
sub r2, r2, r4 ; src increment for height loop
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
|
||||
mov r3, r3, lsl #1 ; height*2
|
||||
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
|
||||
|
||||
mov r11, r1 ; save dst_ptr for each row
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_1st_filter
|
||||
|
||||
|bil_height_loop_1st_v6|
|
||||
ldrb r6, [r0] ; load source data
|
||||
ldrb r7, [r0, #1]
|
||||
ldrb r8, [r0, #2]
|
||||
mov lr, r4, lsr #2 ; 4-in-parellel loop counter
|
||||
|
||||
|bil_width_loop_1st_v6|
|
||||
ldrb r9, [r0, #3]
|
||||
ldrb r10, [r0, #4]
|
||||
|
||||
pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0]
|
||||
pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1]
|
||||
|
||||
smuad r6, r6, r5 ; apply the filter
|
||||
pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2]
|
||||
smuad r7, r7, r5
|
||||
pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3]
|
||||
|
||||
smuad r8, r8, r5
|
||||
smuad r9, r9, r5
|
||||
|
||||
add r0, r0, #4
|
||||
subs lr, lr, #1
|
||||
|
||||
add r6, r6, #0x40 ; round_shift_and_clamp
|
||||
add r7, r7, #0x40
|
||||
usat r6, #16, r6, asr #7
|
||||
usat r7, #16, r7, asr #7
|
||||
|
||||
strh r6, [r1], r3 ; result is transposed and stored
|
||||
|
||||
add r8, r8, #0x40 ; round_shift_and_clamp
|
||||
strh r7, [r1], r3
|
||||
add r9, r9, #0x40
|
||||
usat r8, #16, r8, asr #7
|
||||
usat r9, #16, r9, asr #7
|
||||
|
||||
strh r8, [r1], r3 ; result is transposed and stored
|
||||
|
||||
ldrneb r6, [r0] ; load source data
|
||||
strh r9, [r1], r3
|
||||
|
||||
ldrneb r7, [r0, #1]
|
||||
ldrneb r8, [r0, #2]
|
||||
|
||||
bne bil_width_loop_1st_v6
|
||||
|
||||
add r0, r0, r2 ; move to next input row
|
||||
subs r12, r12, #1
|
||||
|
||||
add r9, r2, r4, lsl #1 ; adding back block width
|
||||
pld [r0, r9] ; preload next row
|
||||
|
||||
add r11, r11, #2 ; move over to next column
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_1st_v6
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
|bil_null_1st_filter|
|
||||
|bil_height_loop_null_1st|
|
||||
mov lr, r4, lsr #2 ; loop counter
|
||||
|
||||
|bil_width_loop_null_1st|
|
||||
ldrb r6, [r0] ; load data
|
||||
ldrb r7, [r0, #1]
|
||||
ldrb r8, [r0, #2]
|
||||
ldrb r9, [r0, #3]
|
||||
|
||||
strh r6, [r1], r3 ; store it to immediate buffer
|
||||
add r0, r0, #4
|
||||
strh r7, [r1], r3
|
||||
subs lr, lr, #1
|
||||
strh r8, [r1], r3
|
||||
strh r9, [r1], r3
|
||||
|
||||
bne bil_width_loop_null_1st
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r2 ; move to next input line
|
||||
add r11, r11, #2 ; move over to next column
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_null_1st
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP ; |vp8_filter_block2d_bil_first_pass_armv6|
|
||||
|
||||
|
||||
;---------------------------------
|
||||
; r0 unsigned short *src_ptr,
|
||||
; r1 unsigned char *dst_ptr,
|
||||
; r2 int dst_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_bil_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
|
||||
mov r11, r1
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_2nd_filter
|
||||
|
||||
|bil_height_loop_2nd|
|
||||
ldr r6, [r0] ; load the data
|
||||
ldr r8, [r0, #4]
|
||||
ldrh r10, [r0, #8]
|
||||
mov lr, r3, lsr #2 ; loop counter
|
||||
|
||||
|bil_width_loop_2nd|
|
||||
pkhtb r7, r6, r8 ; src[1] | src[2]
|
||||
pkhtb r9, r8, r10 ; src[3] | src[4]
|
||||
|
||||
smuad r6, r6, r5 ; apply filter
|
||||
smuad r8, r8, r5 ; apply filter
|
||||
|
||||
subs lr, lr, #1
|
||||
|
||||
smuadx r7, r7, r5 ; apply filter
|
||||
smuadx r9, r9, r5 ; apply filter
|
||||
|
||||
add r0, r0, #8
|
||||
|
||||
add r6, r6, #0x40 ; round_shift_and_clamp
|
||||
add r7, r7, #0x40
|
||||
usat r6, #8, r6, asr #7
|
||||
usat r7, #8, r7, asr #7
|
||||
strb r6, [r1], r2 ; the result is transposed back and stored
|
||||
|
||||
add r8, r8, #0x40 ; round_shift_and_clamp
|
||||
strb r7, [r1], r2
|
||||
add r9, r9, #0x40
|
||||
usat r8, #8, r8, asr #7
|
||||
usat r9, #8, r9, asr #7
|
||||
strb r8, [r1], r2 ; the result is transposed back and stored
|
||||
|
||||
ldrne r6, [r0] ; load data
|
||||
strb r9, [r1], r2
|
||||
ldrne r8, [r0, #4]
|
||||
ldrneh r10, [r0, #8]
|
||||
|
||||
bne bil_width_loop_2nd
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, #4 ; update src for next row
|
||||
add r11, r11, #1
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_2nd
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
|bil_null_2nd_filter|
|
||||
|bil_height_loop_null_2nd|
|
||||
mov lr, r3, lsr #2
|
||||
|
||||
|bil_width_loop_null_2nd|
|
||||
ldr r6, [r0], #4 ; load data
|
||||
subs lr, lr, #1
|
||||
ldr r8, [r0], #4
|
||||
|
||||
strb r6, [r1], r2 ; store data
|
||||
mov r7, r6, lsr #16
|
||||
strb r7, [r1], r2
|
||||
mov r9, r8, lsr #16
|
||||
strb r8, [r1], r2
|
||||
strb r9, [r1], r2
|
||||
|
||||
bne bil_width_loop_null_2nd
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, #4
|
||||
add r11, r11, #1
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_null_2nd
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_second_pass_armv6|
|
||||
|
||||
END
|
|
@ -1,186 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem16x16_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem16x16_v6| PROC
|
||||
stmdb sp!, {r4 - r7}
|
||||
;push {r4-r7}
|
||||
|
||||
;preload
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
ands r4, r0, #15
|
||||
beq copy_mem16x16_fast
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem16x16_8
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem16x16_4
|
||||
|
||||
;copy one byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
ldrb r6, [r0, #2]
|
||||
ldrb r7, [r0, #3]
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
strb r6, [r2, #2]
|
||||
strb r7, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
ldrb r6, [r0, #6]
|
||||
ldrb r7, [r0, #7]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
strb r6, [r2, #6]
|
||||
strb r7, [r2, #7]
|
||||
|
||||
ldrb r4, [r0, #8]
|
||||
ldrb r5, [r0, #9]
|
||||
ldrb r6, [r0, #10]
|
||||
ldrb r7, [r0, #11]
|
||||
|
||||
strb r4, [r2, #8]
|
||||
strb r5, [r2, #9]
|
||||
strb r6, [r2, #10]
|
||||
strb r7, [r2, #11]
|
||||
|
||||
ldrb r4, [r0, #12]
|
||||
ldrb r5, [r0, #13]
|
||||
ldrb r6, [r0, #14]
|
||||
ldrb r7, [r0, #15]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #12]
|
||||
strb r5, [r2, #13]
|
||||
strb r6, [r2, #14]
|
||||
strb r7, [r2, #15]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
ldrneb r6, [r0, #2]
|
||||
ldrneb r7, [r0, #3]
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
bne copy_mem16x16_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem16x16_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
ldr r6, [r0, #8]
|
||||
ldr r7, [r0, #12]
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
str r6, [r2, #8]
|
||||
str r7, [r2, #12]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
ldrne r6, [r0, #8]
|
||||
ldrne r7, [r0, #12]
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
bne copy_mem16x16_4_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem16x16_8
|
||||
sub r1, r1, #16
|
||||
sub r3, r3, #16
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_8_loop
|
||||
ldmia r0!, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
ldmia r0!, {r6-r7}
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
stmia r2!, {r4-r5}
|
||||
subs r12, r12, #1
|
||||
;stm r2, {r4-r5}
|
||||
stmia r2!, {r6-r7}
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
bne copy_mem16x16_8_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 16 bytes each time
|
||||
copy_mem16x16_fast
|
||||
;sub r1, r1, #16
|
||||
;sub r3, r3, #16
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_fast_loop
|
||||
ldmia r0, {r4-r7}
|
||||
;ldm r0, {r4-r7}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r7}
|
||||
;stm r2, {r4-r7}
|
||||
add r2, r2, r3
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
bne copy_mem16x16_fast_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem16x16_v6|
|
||||
|
||||
END
|
|
@ -1,128 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x4_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp8_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x4_v6| PROC
|
||||
;push {r4-r5}
|
||||
stmdb sp!, {r4-r5}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem8x4_fast
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem8x4_4
|
||||
|
||||
;copy 1 byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
|
||||
ldrb r4, [r0, #2]
|
||||
ldrb r5, [r0, #3]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #2]
|
||||
strb r5, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
|
||||
ldrb r4, [r0, #6]
|
||||
ldrb r5, [r0, #7]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #6]
|
||||
strb r5, [r2, #7]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
|
||||
bne copy_mem8x4_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem8x4_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
|
||||
bne copy_mem8x4_4_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem8x4_fast
|
||||
;sub r1, r1, #8
|
||||
;sub r3, r3, #8
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_fast_loop
|
||||
ldmia r0, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r5}
|
||||
;stm r2, {r4-r5}
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem8x4_fast_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x4_v6|
|
||||
|
||||
END
|
|
@ -1,128 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x8_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem8x8_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x8_v6| PROC
|
||||
;push {r4-r5}
|
||||
stmdb sp!, {r4-r5}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem8x8_fast
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem8x8_4
|
||||
|
||||
;copy 1 byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
|
||||
ldrb r4, [r0, #2]
|
||||
ldrb r5, [r0, #3]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #2]
|
||||
strb r5, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
|
||||
ldrb r4, [r0, #6]
|
||||
ldrb r5, [r0, #7]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #6]
|
||||
strb r5, [r2, #7]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
|
||||
bne copy_mem8x8_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem8x8_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
|
||||
bne copy_mem8x8_4_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem8x8_fast
|
||||
;sub r1, r1, #8
|
||||
;sub r3, r3, #8
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_fast_loop
|
||||
ldmia r0, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r5}
|
||||
;stm r2, {r4-r5}
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem8x8_fast_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x8_v6|
|
||||
|
||||
END
|
|
@ -1,70 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_dc_only_idct_add_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
|
||||
; int pred_stride, unsigned char *dst_ptr,
|
||||
; int dst_stride)
|
||||
; r0 input_dc
|
||||
; r1 pred_ptr
|
||||
; r2 pred_stride
|
||||
; r3 dst_ptr
|
||||
; sp dst_stride
|
||||
|
||||
|vp8_dc_only_idct_add_v6| PROC
|
||||
stmdb sp!, {r4 - r7}
|
||||
|
||||
add r0, r0, #4 ; input_dc += 4
|
||||
ldr r12, c0x0000FFFF
|
||||
ldr r4, [r1], r2
|
||||
and r0, r12, r0, asr #3 ; input_dc >> 3 + mask
|
||||
ldr r6, [r1], r2
|
||||
orr r0, r0, r0, lsl #16 ; a1 | a1
|
||||
|
||||
ldr r12, [sp, #16] ; dst stride
|
||||
|
||||
uxtab16 r5, r0, r4 ; a1+2 | a1+0
|
||||
uxtab16 r4, r0, r4, ror #8 ; a1+3 | a1+1
|
||||
uxtab16 r7, r0, r6
|
||||
uxtab16 r6, r0, r6, ror #8
|
||||
usat16 r5, #8, r5
|
||||
usat16 r4, #8, r4
|
||||
usat16 r7, #8, r7
|
||||
usat16 r6, #8, r6
|
||||
orr r5, r5, r4, lsl #8
|
||||
orr r7, r7, r6, lsl #8
|
||||
ldr r4, [r1], r2
|
||||
str r5, [r3], r12
|
||||
ldr r6, [r1]
|
||||
str r7, [r3], r12
|
||||
|
||||
uxtab16 r5, r0, r4
|
||||
uxtab16 r4, r0, r4, ror #8
|
||||
uxtab16 r7, r0, r6
|
||||
uxtab16 r6, r0, r6, ror #8
|
||||
usat16 r5, #8, r5
|
||||
usat16 r4, #8, r4
|
||||
usat16 r7, #8, r7
|
||||
usat16 r6, #8, r6
|
||||
orr r5, r5, r4, lsl #8
|
||||
orr r7, r7, r6, lsl #8
|
||||
str r5, [r3], r12
|
||||
str r7, [r3]
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
bx lr
|
||||
|
||||
ENDP ; |vp8_dc_only_idct_add_v6|
|
||||
|
||||
; Constant Pool
|
||||
c0x0000FFFF DCD 0x0000FFFF
|
||||
END
|
|
@ -1,190 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_dequant_idct_add_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
;void vp8_dequant_idct_v6(short *input, short *dq,
|
||||
; unsigned char *dest, int stride)
|
||||
; r0 = q
|
||||
; r1 = dq
|
||||
; r2 = dst
|
||||
; r3 = stride
|
||||
|
||||
|vp8_dequant_idct_add_v6| PROC
|
||||
stmdb sp!, {r4-r11, lr}
|
||||
|
||||
ldr r4, [r0] ;input
|
||||
ldr r5, [r1], #4 ;dq
|
||||
|
||||
sub sp, sp, #4
|
||||
str r3, [sp]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
vp8_dequant_add_loop
|
||||
smulbb r6, r4, r5
|
||||
smultt r7, r4, r5
|
||||
|
||||
ldr r4, [r0, #4] ;input
|
||||
ldr r5, [r1], #4 ;dq
|
||||
|
||||
strh r6, [r0], #2
|
||||
strh r7, [r0], #2
|
||||
|
||||
smulbb r6, r4, r5
|
||||
smultt r7, r4, r5
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
ldrne r4, [r0, #4]
|
||||
ldrne r5, [r1], #4
|
||||
|
||||
strh r6, [r0], #2
|
||||
strh r7, [r0], #2
|
||||
|
||||
bne vp8_dequant_add_loop
|
||||
|
||||
sub r0, r0, #32
|
||||
mov r1, r0
|
||||
|
||||
; short_idct4x4llm_v6_dual
|
||||
ldr r3, cospi8sqrt2minus1
|
||||
ldr r4, sinpi8sqrt2
|
||||
ldr r6, [r0, #8]
|
||||
mov r5, #2
|
||||
vp8_dequant_idct_loop1_v6
|
||||
ldr r12, [r0, #24]
|
||||
ldr r14, [r0, #16]
|
||||
smulwt r9, r3, r6
|
||||
smulwb r7, r3, r6
|
||||
smulwt r10, r4, r6
|
||||
smulwb r8, r4, r6
|
||||
pkhbt r7, r7, r9, lsl #16
|
||||
smulwt r11, r3, r12
|
||||
pkhbt r8, r8, r10, lsl #16
|
||||
uadd16 r6, r6, r7
|
||||
smulwt r7, r4, r12
|
||||
smulwb r9, r3, r12
|
||||
smulwb r10, r4, r12
|
||||
subs r5, r5, #1
|
||||
pkhbt r9, r9, r11, lsl #16
|
||||
ldr r11, [r0], #4
|
||||
pkhbt r10, r10, r7, lsl #16
|
||||
uadd16 r7, r12, r9
|
||||
usub16 r7, r8, r7
|
||||
uadd16 r6, r6, r10
|
||||
uadd16 r10, r11, r14
|
||||
usub16 r8, r11, r14
|
||||
uadd16 r9, r10, r6
|
||||
usub16 r10, r10, r6
|
||||
uadd16 r6, r8, r7
|
||||
usub16 r7, r8, r7
|
||||
str r6, [r1, #8]
|
||||
ldrne r6, [r0, #8]
|
||||
str r7, [r1, #16]
|
||||
str r10, [r1, #24]
|
||||
str r9, [r1], #4
|
||||
bne vp8_dequant_idct_loop1_v6
|
||||
|
||||
mov r5, #2
|
||||
sub r0, r1, #8
|
||||
vp8_dequant_idct_loop2_v6
|
||||
ldr r6, [r0], #4
|
||||
ldr r7, [r0], #4
|
||||
ldr r8, [r0], #4
|
||||
ldr r9, [r0], #4
|
||||
smulwt r1, r3, r6
|
||||
smulwt r12, r4, r6
|
||||
smulwt lr, r3, r8
|
||||
smulwt r10, r4, r8
|
||||
pkhbt r11, r8, r6, lsl #16
|
||||
pkhbt r1, lr, r1, lsl #16
|
||||
pkhbt r12, r10, r12, lsl #16
|
||||
pkhtb r6, r6, r8, asr #16
|
||||
uadd16 r6, r1, r6
|
||||
pkhbt lr, r9, r7, lsl #16
|
||||
uadd16 r10, r11, lr
|
||||
usub16 lr, r11, lr
|
||||
pkhtb r8, r7, r9, asr #16
|
||||
subs r5, r5, #1
|
||||
smulwt r1, r3, r8
|
||||
smulwb r7, r3, r8
|
||||
smulwt r11, r4, r8
|
||||
smulwb r9, r4, r8
|
||||
pkhbt r1, r7, r1, lsl #16
|
||||
uadd16 r8, r1, r8
|
||||
pkhbt r11, r9, r11, lsl #16
|
||||
usub16 r1, r12, r8
|
||||
uadd16 r8, r11, r6
|
||||
ldr r9, c0x00040004
|
||||
ldr r12, [sp] ; get stride from stack
|
||||
uadd16 r6, r10, r8
|
||||
usub16 r7, r10, r8
|
||||
uadd16 r7, r7, r9
|
||||
uadd16 r6, r6, r9
|
||||
uadd16 r10, r14, r1
|
||||
usub16 r1, r14, r1
|
||||
uadd16 r10, r10, r9
|
||||
uadd16 r1, r1, r9
|
||||
ldr r11, [r2] ; load input from dst
|
||||
mov r8, r7, asr #3
|
||||
pkhtb r9, r8, r10, asr #19
|
||||
mov r8, r1, asr #3
|
||||
pkhtb r8, r8, r6, asr #19
|
||||
uxtb16 lr, r11, ror #8
|
||||
qadd16 r9, r9, lr
|
||||
uxtb16 lr, r11
|
||||
qadd16 r8, r8, lr
|
||||
usat16 r9, #8, r9
|
||||
usat16 r8, #8, r8
|
||||
orr r9, r8, r9, lsl #8
|
||||
ldr r11, [r2, r12] ; load input from dst
|
||||
mov r7, r7, lsl #16
|
||||
mov r1, r1, lsl #16
|
||||
mov r10, r10, lsl #16
|
||||
mov r6, r6, lsl #16
|
||||
mov r7, r7, asr #3
|
||||
pkhtb r7, r7, r10, asr #19
|
||||
mov r1, r1, asr #3
|
||||
pkhtb r1, r1, r6, asr #19
|
||||
uxtb16 r8, r11, ror #8
|
||||
qadd16 r7, r7, r8
|
||||
uxtb16 r8, r11
|
||||
qadd16 r1, r1, r8
|
||||
usat16 r7, #8, r7
|
||||
usat16 r1, #8, r1
|
||||
orr r1, r1, r7, lsl #8
|
||||
str r9, [r2], r12 ; store output to dst
|
||||
str r1, [r2], r12 ; store output to dst
|
||||
bne vp8_dequant_idct_loop2_v6
|
||||
|
||||
; memset
|
||||
sub r0, r0, #32
|
||||
add sp, sp, #4
|
||||
|
||||
mov r12, #0
|
||||
str r12, [r0]
|
||||
str r12, [r0, #4]
|
||||
str r12, [r0, #8]
|
||||
str r12, [r0, #12]
|
||||
str r12, [r0, #16]
|
||||
str r12, [r0, #20]
|
||||
str r12, [r0, #24]
|
||||
str r12, [r0, #28]
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_dequant_idct_add_v6|
|
||||
|
||||
; Constant Pool
|
||||
cospi8sqrt2minus1 DCD 0x00004E7B
|
||||
sinpi8sqrt2 DCD 0x00008A8C
|
||||
c0x00040004 DCD 0x00040004
|
||||
|
||||
END
|
|
@ -1,69 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_dequantize_b_loop_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------
|
||||
;void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
|
||||
; r0 short *Q,
|
||||
; r1 short *DQC
|
||||
; r2 short *DQ
|
||||
|vp8_dequantize_b_loop_v6| PROC
|
||||
stmdb sp!, {r4-r9, lr}
|
||||
|
||||
ldr r3, [r0] ;load Q
|
||||
ldr r4, [r1] ;load DQC
|
||||
ldr r5, [r0, #4]
|
||||
ldr r6, [r1, #4]
|
||||
|
||||
mov r12, #2 ;loop counter
|
||||
|
||||
dequant_loop
|
||||
smulbb r7, r3, r4 ;multiply
|
||||
smultt r8, r3, r4
|
||||
smulbb r9, r5, r6
|
||||
smultt lr, r5, r6
|
||||
|
||||
ldr r3, [r0, #8]
|
||||
ldr r4, [r1, #8]
|
||||
ldr r5, [r0, #12]
|
||||
ldr r6, [r1, #12]
|
||||
|
||||
strh r7, [r2], #2 ;store result
|
||||
smulbb r7, r3, r4 ;multiply
|
||||
strh r8, [r2], #2
|
||||
smultt r8, r3, r4
|
||||
strh r9, [r2], #2
|
||||
smulbb r9, r5, r6
|
||||
strh lr, [r2], #2
|
||||
smultt lr, r5, r6
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
add r0, r0, #16
|
||||
add r1, r1, #16
|
||||
|
||||
ldrne r3, [r0]
|
||||
strh r7, [r2], #2 ;store result
|
||||
ldrne r4, [r1]
|
||||
strh r8, [r2], #2
|
||||
ldrne r5, [r0, #4]
|
||||
strh r9, [r2], #2
|
||||
ldrne r6, [r1, #4]
|
||||
strh lr, [r2], #2
|
||||
|
||||
bne dequant_loop
|
||||
|
||||
ldmia sp!, {r4-r9, pc}
|
||||
ENDP ;|vp8_dequantize_b_loop_v6|
|
||||
|
||||
END
|
|
@ -1,624 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_filter_block2d_first_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_16x16_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_8x8_armv6|
|
||||
EXPORT |vp8_filter_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter4_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_only_armv6|
|
||||
EXPORT |vp8_filter_block2d_second_pass_only_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 short *output_ptr
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int output_width
|
||||
; stack unsigned int output_height
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; vp8_filter the input and put in the output array. Apply the 6 tap FIR filter with
|
||||
; the output being a 2 byte value and the intput being a 1 byte value.
|
||||
|vp8_filter_block2d_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; --------------------------
|
||||
; 16x16 version
|
||||
; -----------------------------
|
||||
|vp8_filter_block2d_first_pass_16x16_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
add r4, r2, #18 ; preload next low
|
||||
pld [r0, r4]
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_16_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_16_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_16_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r11, r2, #34 ; adding back block width(=16)
|
||||
pld [r0, r11] ; preload next low
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_16_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; --------------------------
|
||||
; 8x8 version
|
||||
; -----------------------------
|
||||
|vp8_filter_block2d_first_pass_8x8_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
add r4, r2, #10 ; preload next low
|
||||
pld [r0, r4]
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_8_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_8_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_8_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r11, r2, #18 ; adding back block width(=8)
|
||||
pld [r0, r11] ; preload next low
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_8_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;---------------------------------
|
||||
; r0 short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int output_pitch,
|
||||
; r3 unsigned int cnt,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #36] ; vp8_filter address
|
||||
sub sp, sp, #4
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
str r1, [sp] ; push destination to stack
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
pkhbt r12, r5, r4 ; pack the filter differently
|
||||
pkhbt r11, r6, r5
|
||||
|
||||
sub r0, r0, #4 ; offset input buffer
|
||||
|
||||
|height_loop_2nd|
|
||||
ldr r8, [r0] ; load the data
|
||||
ldr r9, [r0, #4]
|
||||
orr r7, r7, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_2nd|
|
||||
smuad lr, r4, r8 ; apply filter
|
||||
sub r7, r7, #1
|
||||
smulbt r8, r4, r8
|
||||
|
||||
ldr r10, [r0, #8]
|
||||
|
||||
smlad lr, r5, r9, lr
|
||||
smladx r8, r12, r9, r8
|
||||
|
||||
ldrh r9, [r0, #12]
|
||||
|
||||
smlad lr, r6, r10, lr
|
||||
smladx r8, r11, r10, r8
|
||||
|
||||
add r0, r0, #4
|
||||
smlatb r10, r6, r9, r8
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ands r8, r7, #0xff
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r1], r2 ; the result is transposed back and stored
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrne r8, [r0] ; load data for next loop
|
||||
ldrne r9, [r0, #4]
|
||||
strb r10, [r1], r2
|
||||
|
||||
bne width_loop_2nd
|
||||
|
||||
ldr r1, [sp] ; update dst for next loop
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #16 ; updata src for next loop
|
||||
add r1, r1, #1
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_2nd
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;---------------------------------
|
||||
; r0 short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int output_pitch,
|
||||
; r3 unsigned int cnt,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter4_block2d_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #36] ; vp8_filter address
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
add lr, r1, r3 ; save final destination pointer
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
pkhbt r12, r5, r4 ; pack the filter differently
|
||||
pkhbt r11, r6, r5
|
||||
mov r4, #0x40 ; rounding factor (for smlad{x})
|
||||
|
||||
|height_loop_2nd_4|
|
||||
ldrd r8, r9, [r0, #-4] ; load the data
|
||||
orr r7, r7, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_2nd_4|
|
||||
ldr r10, [r0, #4]!
|
||||
smladx r6, r9, r12, r4 ; apply filter
|
||||
pkhbt r8, r9, r8
|
||||
smlad r5, r8, r12, r4
|
||||
pkhbt r8, r10, r9
|
||||
smladx r6, r10, r11, r6
|
||||
sub r7, r7, #1
|
||||
smlad r5, r8, r11, r5
|
||||
|
||||
mov r8, r9 ; shift the data for the next loop
|
||||
mov r9, r10
|
||||
|
||||
usat r6, #8, r6, asr #7 ; shift and clamp
|
||||
usat r5, #8, r5, asr #7
|
||||
|
||||
strb r5, [r1], r2 ; the result is transposed back and stored
|
||||
tst r7, #0xff
|
||||
strb r6, [r1], r2
|
||||
|
||||
bne width_loop_2nd_4
|
||||
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #16 ; update src for next loop
|
||||
sub r1, lr, r7, lsr #16 ; update dst for next loop
|
||||
|
||||
bne height_loop_2nd_4
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;------------------------------------
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int cnt,
|
||||
; stack unsigned int output_pitch,
|
||||
; stack const short *vp8_filter
|
||||
;------------------------------------
|
||||
|vp8_filter_block2d_first_pass_only_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
add r7, r2, r3 ; preload next low
|
||||
add r7, r7, #2
|
||||
pld [r0, r7]
|
||||
|
||||
ldr r4, [sp, #36] ; output pitch
|
||||
ldr r11, [sp, #40] ; HFilter address
|
||||
sub sp, sp, #8
|
||||
|
||||
mov r7, r3
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
sub r4, r4, r3
|
||||
str r4, [sp] ; save modified output pitch
|
||||
str r2, [sp, #4]
|
||||
|
||||
mov r2, #0x40
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_only_6|
|
||||
ldrb r8, [r0, #-2] ; load data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
|
||||
mov r12, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_1st_only_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
;; smuad lr, lr, r4
|
||||
smlad lr, lr, r4, r2
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
;; smuad r8, r8, r4
|
||||
smlad r8, r8, r4, r2
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r10, r10, r6, r8
|
||||
|
||||
;; add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
;; add r10, r10, #0x40
|
||||
strb lr, [r1], #1 ; store the result
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrneb r9, [r0, #-1]
|
||||
strb r10, [r1], #1
|
||||
ldrneb r10, [r0], #2
|
||||
|
||||
bne width_loop_1st_only_6
|
||||
|
||||
ldr lr, [sp] ; load back output pitch
|
||||
ldr r12, [sp, #4] ; load back output pitch
|
||||
subs r7, r7, #1
|
||||
add r0, r0, r12 ; updata src for next loop
|
||||
|
||||
add r11, r12, r3 ; preload next low
|
||||
add r11, r11, #2
|
||||
pld [r0, r11]
|
||||
|
||||
add r1, r1, lr ; update dst for next loop
|
||||
|
||||
bne height_loop_1st_only_6
|
||||
|
||||
add sp, sp, #8
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_first_pass_only_armv6|
|
||||
|
||||
|
||||
;------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int cnt,
|
||||
; stack unsigned int output_pitch,
|
||||
; stack const short *vp8_filter
|
||||
;------------------------------------
|
||||
|vp8_filter_block2d_second_pass_only_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; VFilter address
|
||||
ldr r12, [sp, #36] ; output pitch
|
||||
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
sub r0, r0, r2, lsl #1 ; need 6 elements for filtering, 2 before, 3 after
|
||||
|
||||
sub sp, sp, #8
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r0, [sp] ; save r0 to stack
|
||||
str r1, [sp, #4] ; save dst to stack
|
||||
|
||||
; six tap filter
|
||||
|width_loop_2nd_only_6|
|
||||
ldrb r8, [r0], r2 ; load data
|
||||
orr r7, r7, r3 ; loop counter
|
||||
ldrb r9, [r0], r2
|
||||
ldrb r10, [r0], r2
|
||||
|
||||
|height_loop_2nd_only_6|
|
||||
; filter first column in this inner loop, than, move to next colum.
|
||||
ldrb r11, [r0], r2
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0], r2
|
||||
|
||||
smuad lr, lr, r4
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0], r2
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0]
|
||||
|
||||
sub r7, r7, #2
|
||||
sub r0, r0, r2, lsl #2
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r10, r10, r6, r8
|
||||
|
||||
ands r9, r7, #0xff
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0], r2 ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r1], r12 ; store the result for the column
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrneb r9, [r0], r2
|
||||
strb r10, [r1], r12
|
||||
ldrneb r10, [r0], r2
|
||||
|
||||
bne height_loop_2nd_only_6
|
||||
|
||||
ldr r0, [sp]
|
||||
ldr r1, [sp, #4]
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #1 ; move to filter next column
|
||||
str r0, [sp]
|
||||
add r1, r1, #1
|
||||
str r1, [sp, #4]
|
||||
|
||||
bne width_loop_2nd_only_6
|
||||
|
||||
add sp, sp, #8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_second_pass_only_armv6|
|
||||
|
||||
END
|
|
@ -1,115 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
|
||||
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
|
||||
unsigned char *dst,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dst, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dst, stride, dst, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dst+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dst+4, stride, dst+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+32, dq, dst+8, stride);
|
||||
else if (eobs[2] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[32]*dq[0], dst+8, stride, dst+8, stride);
|
||||
((int *)(q+32))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+48, dq, dst+12, stride);
|
||||
else if (eobs[3] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[48]*dq[0], dst+12, stride,dst+12,stride);
|
||||
((int *)(q+48))[0] = 0;
|
||||
}
|
||||
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dstu, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dstu+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstu+4, stride,
|
||||
dstu+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
eobs += 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dstv, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dstv+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstv+4, stride,
|
||||
dstv+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
eobs += 2;
|
||||
}
|
||||
}
|
|
@ -1,202 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_short_idct4x4llm_v6_dual|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
|
||||
; void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
|
||||
; unsigned char *dst, int stride)
|
||||
; r0 short* input
|
||||
; r1 unsigned char* pred
|
||||
; r2 int pitch
|
||||
; r3 unsigned char* dst
|
||||
; sp int stride
|
||||
|
||||
|vp8_short_idct4x4llm_v6_dual| PROC
|
||||
stmdb sp!, {r4-r11, lr}
|
||||
|
||||
sub sp, sp, #4
|
||||
|
||||
mov r4, #0x00008A00 ; sin
|
||||
orr r4, r4, #0x0000008C ; sinpi8sqrt2
|
||||
|
||||
mov r5, #0x00004E00 ; cos
|
||||
orr r5, r5, #0x0000007B ; cospi8sqrt2minus1
|
||||
orr r5, r5, #1<<31 ; loop counter on top bit
|
||||
|
||||
loop1_dual
|
||||
ldr r6, [r0, #(4*2)] ; i5 | i4
|
||||
ldr r12, [r0, #(12*2)] ; i13|i12
|
||||
ldr r14, [r0, #(8*2)] ; i9 | i8
|
||||
|
||||
smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16
|
||||
smulbb r7, r5, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16
|
||||
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16
|
||||
smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16
|
||||
|
||||
smulbt r11, r5, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16
|
||||
pkhtb r7, r9, r7, asr #16 ; 5c | 4c
|
||||
pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
|
||||
uadd16 r6, r6, r7 ; 5c+5 | 4c+4
|
||||
|
||||
smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16
|
||||
smulbb r9, r5, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16
|
||||
smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16
|
||||
|
||||
subs r5, r5, #1<<31 ; i--
|
||||
|
||||
pkhtb r9, r11, r9, asr #16 ; 13c | 12c
|
||||
ldr r11, [r0] ; i1 | i0
|
||||
pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
|
||||
uadd16 r7, r12, r9 ; 13c+13 | 12c+12
|
||||
|
||||
usub16 r7, r8, r7 ; c
|
||||
uadd16 r6, r6, r10 ; d
|
||||
uadd16 r10, r11, r14 ; a
|
||||
usub16 r8, r11, r14 ; b
|
||||
|
||||
uadd16 r9, r10, r6 ; a+d
|
||||
usub16 r10, r10, r6 ; a-d
|
||||
uadd16 r6, r8, r7 ; b+c
|
||||
usub16 r7, r8, r7 ; b-c
|
||||
|
||||
; use input buffer to store intermediate results
|
||||
str r6, [r0, #(4*2)] ; o5 | o4
|
||||
str r7, [r0, #(8*2)] ; o9 | o8
|
||||
str r10,[r0, #(12*2)] ; o13|o12
|
||||
str r9, [r0], #4 ; o1 | o0
|
||||
|
||||
bcs loop1_dual
|
||||
|
||||
sub r0, r0, #8 ; reset input/output
|
||||
str r0, [sp]
|
||||
|
||||
loop2_dual
|
||||
|
||||
ldr r6, [r0, #(4*2)] ; i5 | i4
|
||||
ldr r12,[r0, #(2*2)] ; i3 | i2
|
||||
ldr r14,[r0, #(6*2)] ; i7 | i6
|
||||
ldr r0, [r0, #(0*2)] ; i1 | i0
|
||||
|
||||
smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16
|
||||
smulbt r7, r5, r0 ; (ip[1] * cospi8sqrt2minus1) >> 16
|
||||
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16
|
||||
smulwt r8, r4, r0 ; (ip[1] * sinpi8sqrt2) >> 16
|
||||
|
||||
pkhbt r11, r6, r0, lsl #16 ; i0 | i4
|
||||
pkhtb r7, r7, r9, asr #16 ; 1c | 5c
|
||||
pkhtb r0, r0, r6, asr #16 ; i1 | i5
|
||||
pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1
|
||||
|
||||
uadd16 r0, r7, r0 ; 1c+1 | 5c+5 = temp2
|
||||
pkhbt r9, r14, r12, lsl #16 ; i2 | i6
|
||||
uadd16 r10, r11, r9 ; a
|
||||
usub16 r9, r11, r9 ; b
|
||||
pkhtb r6, r12, r14, asr #16 ; i3 | i7
|
||||
|
||||
subs r5, r5, #1<<31 ; i--
|
||||
|
||||
smulbt r7, r5, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16
|
||||
smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16
|
||||
smulbb r12, r5, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16
|
||||
smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16
|
||||
|
||||
pkhtb r7, r7, r12, asr #16 ; 3c | 7c
|
||||
pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1
|
||||
|
||||
uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2
|
||||
usub16 r12, r8, r6 ; c (o1 | o5)
|
||||
uadd16 r6, r11, r0 ; d (o3 | o7)
|
||||
uadd16 r7, r10, r6 ; a+d
|
||||
|
||||
mov r8, #4 ; set up 4's
|
||||
orr r8, r8, #0x40000 ; 4|4
|
||||
|
||||
usub16 r6, r10, r6 ; a-d
|
||||
uadd16 r6, r6, r8 ; a-d+4, 3|7
|
||||
uadd16 r7, r7, r8 ; a+d+4, 0|4
|
||||
uadd16 r10, r9, r12 ; b+c
|
||||
usub16 r0, r9, r12 ; b-c
|
||||
uadd16 r10, r10, r8 ; b+c+4, 1|5
|
||||
uadd16 r8, r0, r8 ; b-c+4, 2|6
|
||||
|
||||
ldr lr, [sp, #40] ; dst stride
|
||||
|
||||
ldrb r0, [r1] ; pred p0
|
||||
ldrb r11, [r1, #1] ; pred p1
|
||||
ldrb r12, [r1, #2] ; pred p2
|
||||
|
||||
add r0, r0, r7, asr #19 ; p0 + o0
|
||||
add r11, r11, r10, asr #19 ; p1 + o1
|
||||
add r12, r12, r8, asr #19 ; p2 + o2
|
||||
|
||||
usat r0, #8, r0 ; d0 = clip8(p0 + o0)
|
||||
usat r11, #8, r11 ; d1 = clip8(p1 + o1)
|
||||
usat r12, #8, r12 ; d2 = clip8(p2 + o2)
|
||||
|
||||
add r0, r0, r11, lsl #8 ; |--|--|d1|d0|
|
||||
|
||||
ldrb r11, [r1, #3] ; pred p3
|
||||
|
||||
add r0, r0, r12, lsl #16 ; |--|d2|d1|d0|
|
||||
|
||||
add r11, r11, r6, asr #19 ; p3 + o3
|
||||
|
||||
sxth r7, r7 ;
|
||||
sxth r10, r10 ;
|
||||
|
||||
usat r11, #8, r11 ; d3 = clip8(p3 + o3)
|
||||
|
||||
sxth r8, r8 ;
|
||||
sxth r6, r6 ;
|
||||
|
||||
add r0, r0, r11, lsl #24 ; |d3|d2|d1|d0|
|
||||
|
||||
ldrb r12, [r1, r2]! ; pred p4
|
||||
str r0, [r3], lr
|
||||
ldrb r11, [r1, #1] ; pred p5
|
||||
|
||||
add r12, r12, r7, asr #3 ; p4 + o4
|
||||
add r11, r11, r10, asr #3 ; p5 + o5
|
||||
|
||||
usat r12, #8, r12 ; d4 = clip8(p4 + o4)
|
||||
usat r11, #8, r11 ; d5 = clip8(p5 + o5)
|
||||
|
||||
ldrb r7, [r1, #2] ; pred p6
|
||||
ldrb r10, [r1, #3] ; pred p6
|
||||
|
||||
add r12, r12, r11, lsl #8 ; |--|--|d5|d4|
|
||||
|
||||
add r7, r7, r8, asr #3 ; p6 + o6
|
||||
add r10, r10, r6, asr #3 ; p7 + o7
|
||||
|
||||
ldr r0, [sp] ; load input pointer
|
||||
|
||||
usat r7, #8, r7 ; d6 = clip8(p6 + o6)
|
||||
usat r10, #8, r10 ; d7 = clip8(p7 + o7)
|
||||
|
||||
add r12, r12, r7, lsl #16 ; |--|d6|d5|d4|
|
||||
add r12, r12, r10, lsl #24 ; |d7|d6|d5|d4|
|
||||
|
||||
str r12, [r3], lr
|
||||
add r0, r0, #16
|
||||
add r1, r1, r2 ; pred + pitch
|
||||
|
||||
bcs loop2_dual
|
||||
|
||||
add sp, sp, #4 ; idct_output buffer
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
|
@ -1,136 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_short_inv_walsh4x4_v6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;short vp8_short_inv_walsh4x4_v6(short *input, short *mb_dqcoeff)
|
||||
|vp8_short_inv_walsh4x4_v6| PROC
|
||||
|
||||
stmdb sp!, {r4 - r12, lr}
|
||||
|
||||
ldr r2, [r0, #0] ; [1 | 0]
|
||||
ldr r3, [r0, #4] ; [3 | 2]
|
||||
ldr r4, [r0, #8] ; [5 | 4]
|
||||
ldr r5, [r0, #12] ; [7 | 6]
|
||||
ldr r6, [r0, #16] ; [9 | 8]
|
||||
ldr r7, [r0, #20] ; [11 | 10]
|
||||
ldr r8, [r0, #24] ; [13 | 12]
|
||||
ldr r9, [r0, #28] ; [15 | 14]
|
||||
|
||||
qadd16 r10, r2, r8 ; a1 [1+13 | 0+12]
|
||||
qadd16 r11, r4, r6 ; b1 [5+9 | 4+8]
|
||||
qsub16 r12, r4, r6 ; c1 [5-9 | 4-8]
|
||||
qsub16 lr, r2, r8 ; d1 [1-13 | 0-12]
|
||||
|
||||
qadd16 r2, r10, r11 ; a1 + b1 [1 | 0]
|
||||
qadd16 r4, r12, lr ; c1 + d1 [5 | 4]
|
||||
qsub16 r6, r10, r11 ; a1 - b1 [9 | 8]
|
||||
qsub16 r8, lr, r12 ; d1 - c1 [13 | 12]
|
||||
|
||||
qadd16 r10, r3, r9 ; a1 [3+15 | 2+14]
|
||||
qadd16 r11, r5, r7 ; b1 [7+11 | 6+10]
|
||||
qsub16 r12, r5, r7 ; c1 [7-11 | 6-10]
|
||||
qsub16 lr, r3, r9 ; d1 [3-15 | 2-14]
|
||||
|
||||
qadd16 r3, r10, r11 ; a1 + b1 [3 | 2]
|
||||
qadd16 r5, r12, lr ; c1 + d1 [7 | 6]
|
||||
qsub16 r7, r10, r11 ; a1 - b1 [11 | 10]
|
||||
qsub16 r9, lr, r12 ; d1 - c1 [15 | 14]
|
||||
|
||||
; first transform complete
|
||||
|
||||
qsubaddx r10, r2, r3 ; [c1|a1] [1-2 | 0+3]
|
||||
qaddsubx r11, r2, r3 ; [b1|d1] [1+2 | 0-3]
|
||||
qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7]
|
||||
qaddsubx lr, r4, r5 ; [b1|d1] [5+6 | 4-7]
|
||||
|
||||
qaddsubx r2, r10, r11 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r3, r11, r10 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
ldr r10, c0x00030003
|
||||
qaddsubx r4, r12, lr ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r5, lr, r12 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
|
||||
qadd16 r2, r2, r10 ; [b2+3|c2+3]
|
||||
qadd16 r3, r3, r10 ; [a2+3|d2+3]
|
||||
qadd16 r4, r4, r10 ; [b2+3|c2+3]
|
||||
qadd16 r5, r5, r10 ; [a2+3|d2+3]
|
||||
|
||||
asr r12, r3, #19 ; [0]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r2, #19 ; [1]
|
||||
strh lr, [r1], #32
|
||||
sxth r2, r2
|
||||
sxth r3, r3
|
||||
asr r2, r2, #3 ; [2]
|
||||
strh r2, [r1], #32
|
||||
asr r3, r3, #3 ; [3]
|
||||
strh r3, [r1], #32
|
||||
|
||||
asr r12, r5, #19 ; [4]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r4, #19 ; [5]
|
||||
strh lr, [r1], #32
|
||||
sxth r4, r4
|
||||
sxth r5, r5
|
||||
asr r4, r4, #3 ; [6]
|
||||
strh r4, [r1], #32
|
||||
asr r5, r5, #3 ; [7]
|
||||
strh r5, [r1], #32
|
||||
|
||||
qsubaddx r2, r6, r7 ; [c1|a1] [9-10 | 8+11]
|
||||
qaddsubx r3, r6, r7 ; [b1|d1] [9+10 | 8-11]
|
||||
qsubaddx r4, r8, r9 ; [c1|a1] [13-14 | 12+15]
|
||||
qaddsubx r5, r8, r9 ; [b1|d1] [13+14 | 12-15]
|
||||
|
||||
qaddsubx r6, r2, r3 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r7, r3, r2 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
qaddsubx r8, r4, r5 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r9, r5, r4 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
|
||||
qadd16 r6, r6, r10 ; [b2+3|c2+3]
|
||||
qadd16 r7, r7, r10 ; [a2+3|d2+3]
|
||||
qadd16 r8, r8, r10 ; [b2+3|c2+3]
|
||||
qadd16 r9, r9, r10 ; [a2+3|d2+3]
|
||||
|
||||
asr r12, r7, #19 ; [8]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r6, #19 ; [9]
|
||||
strh lr, [r1], #32
|
||||
sxth r6, r6
|
||||
sxth r7, r7
|
||||
asr r6, r6, #3 ; [10]
|
||||
strh r6, [r1], #32
|
||||
asr r7, r7, #3 ; [11]
|
||||
strh r7, [r1], #32
|
||||
|
||||
asr r12, r9, #19 ; [12]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r8, #19 ; [13]
|
||||
strh lr, [r1], #32
|
||||
sxth r8, r8
|
||||
sxth r9, r9
|
||||
asr r8, r8, #3 ; [14]
|
||||
strh r8, [r1], #32
|
||||
asr r9, r9, #3 ; [15]
|
||||
strh r9, [r1], #32
|
||||
|
||||
ldmia sp!, {r4 - r12, pc}
|
||||
ENDP ; |vp8_short_inv_walsh4x4_v6|
|
||||
|
||||
|
||||
; Constant Pool
|
||||
c0x00030003 DCD 0x00030003
|
||||
END
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,286 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_horizontal_edge_armv6|
|
||||
EXPORT |vp8_loop_filter_simple_vertical_edge_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
MACRO
|
||||
TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3
|
||||
; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3
|
||||
; a0: 03 02 01 00
|
||||
; a1: 13 12 11 10
|
||||
; a2: 23 22 21 20
|
||||
; a3: 33 32 31 30
|
||||
; b3 b2 b1 b0
|
||||
|
||||
uxtb16 $b1, $a1 ; xx 12 xx 10
|
||||
uxtb16 $b0, $a0 ; xx 02 xx 00
|
||||
uxtb16 $b3, $a3 ; xx 32 xx 30
|
||||
uxtb16 $b2, $a2 ; xx 22 xx 20
|
||||
orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00
|
||||
orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20
|
||||
|
||||
uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11
|
||||
uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31
|
||||
uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01
|
||||
uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21
|
||||
orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01
|
||||
orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21
|
||||
|
||||
pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1
|
||||
pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3
|
||||
|
||||
pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0
|
||||
pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2
|
||||
MEND
|
||||
|
||||
|
||||
|
||||
src RN r0
|
||||
pstep RN r1
|
||||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *blimit
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldrb r12, [r2] ; blimit
|
||||
ldr r3, [src, -pstep, lsl #1] ; p1
|
||||
ldr r4, [src, -pstep] ; p0
|
||||
ldr r5, [src] ; q0
|
||||
ldr r6, [src, pstep] ; q1
|
||||
orr r12, r12, r12, lsl #8 ; blimit
|
||||
ldr r2, c0x80808080
|
||||
orr r12, r12, r12, lsl #16 ; blimit
|
||||
mov r9, #4 ; double the count. we're doing 4 at a time
|
||||
mov lr, #0 ; need 0 in a couple places
|
||||
|
||||
|simple_hnext8|
|
||||
; vp8_simple_filter_mask()
|
||||
|
||||
uqsub8 r7, r3, r6 ; p1 - q1
|
||||
uqsub8 r8, r6, r3 ; q1 - p1
|
||||
uqsub8 r10, r4, r5 ; p0 - q0
|
||||
uqsub8 r11, r5, r4 ; q0 - p0
|
||||
orr r8, r8, r7 ; abs(p1 - q1)
|
||||
orr r10, r10, r11 ; abs(p0 - q0)
|
||||
uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2
|
||||
uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1
|
||||
uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
|
||||
mvn r8, #0
|
||||
usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags
|
||||
sel r10, r8, lr ; filter mask: F or 0
|
||||
cmp r10, #0
|
||||
beq simple_hskip_filter ; skip filtering if all masks are 0x00
|
||||
|
||||
;vp8_simple_filter()
|
||||
|
||||
eor r3, r3, r2 ; p1 offset to convert to a signed value
|
||||
eor r6, r6, r2 ; q1 offset to convert to a signed value
|
||||
eor r4, r4, r2 ; p0 offset to convert to a signed value
|
||||
eor r5, r5, r2 ; q0 offset to convert to a signed value
|
||||
|
||||
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
|
||||
qsub8 r6, r5, r4 ; q0 - p0
|
||||
qadd8 r3, r3, r6 ; += q0 - p0
|
||||
ldr r7, c0x04040404
|
||||
qadd8 r3, r3, r6 ; += q0 - p0
|
||||
ldr r8, c0x03030303
|
||||
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
|
||||
;STALL
|
||||
and r3, r3, r10 ; vp8_filter &= mask
|
||||
|
||||
qadd8 r7 , r3 , r7 ; Filter1 = vp8_filter + 4
|
||||
qadd8 r8 , r3 , r8 ; Filter2 = vp8_filter + 3
|
||||
|
||||
shadd8 r7 , r7 , lr
|
||||
shadd8 r8 , r8 , lr
|
||||
shadd8 r7 , r7 , lr
|
||||
shadd8 r8 , r8 , lr
|
||||
shadd8 r7 , r7 , lr ; Filter1 >>= 3
|
||||
shadd8 r8 , r8 , lr ; Filter2 >>= 3
|
||||
|
||||
qsub8 r5 ,r5, r7 ; u = q0 - Filter1
|
||||
qadd8 r4, r4, r8 ; u = p0 + Filter2
|
||||
eor r5, r5, r2 ; *oq0 = u^0x80
|
||||
str r5, [src] ; store oq0 result
|
||||
eor r4, r4, r2 ; *op0 = u^0x80
|
||||
str r4, [src, -pstep] ; store op0 result
|
||||
|
||||
|simple_hskip_filter|
|
||||
subs r9, r9, #1
|
||||
addne src, src, #4 ; next row
|
||||
|
||||
ldrne r3, [src, -pstep, lsl #1] ; p1
|
||||
ldrne r4, [src, -pstep] ; p0
|
||||
ldrne r5, [src] ; q0
|
||||
ldrne r6, [src, pstep] ; q1
|
||||
|
||||
bne simple_hnext8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_armv6|
|
||||
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_vertical_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldrb r12, [r2] ; r12: blimit
|
||||
ldr r2, c0x80808080
|
||||
orr r12, r12, r12, lsl #8
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrh r4, [src], pstep
|
||||
orr r12, r12, r12, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r4, [src], pstep
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
mov r11, #4 ; double the count. we're doing 4 at a time
|
||||
|
||||
|simple_vnext8|
|
||||
; vp8_simple_filter_mask() function
|
||||
pkhbt r9, r3, r4, lsl #16
|
||||
pkhbt r10, r5, r6, lsl #16
|
||||
|
||||
;transpose r7, r8, r9, r10 to r3, r4, r5, r6
|
||||
TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6
|
||||
|
||||
uqsub8 r7, r3, r6 ; p1 - q1
|
||||
uqsub8 r8, r6, r3 ; q1 - p1
|
||||
uqsub8 r9, r4, r5 ; p0 - q0
|
||||
uqsub8 r10, r5, r4 ; q0 - p0
|
||||
orr r7, r7, r8 ; abs(p1 - q1)
|
||||
orr r9, r9, r10 ; abs(p0 - q0)
|
||||
mov r8, #0
|
||||
uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2
|
||||
uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2
|
||||
uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
|
||||
mvn r10, #0 ; r10 == -1
|
||||
|
||||
usub8 r7, r12, r7 ; compare to flimit
|
||||
sel lr, r10, r8 ; filter mask
|
||||
|
||||
cmp lr, #0
|
||||
beq simple_vskip_filter ; skip filtering
|
||||
|
||||
;vp8_simple_filter() function
|
||||
eor r3, r3, r2 ; p1 offset to convert to a signed value
|
||||
eor r6, r6, r2 ; q1 offset to convert to a signed value
|
||||
eor r4, r4, r2 ; p0 offset to convert to a signed value
|
||||
eor r5, r5, r2 ; q0 offset to convert to a signed value
|
||||
|
||||
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
|
||||
qsub8 r6, r5, r4 ; q0 - p0
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
|
||||
ldr r9, c0x03030303 ; r9 = 3
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
|
||||
ldr r7, c0x04040404
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
|
||||
;STALL
|
||||
and r3, r3, lr ; vp8_filter &= mask
|
||||
|
||||
qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3
|
||||
qadd8 r3 , r3 , r7 ; Filter1 = vp8_filter + 4
|
||||
|
||||
shadd8 r9 , r9 , r8
|
||||
shadd8 r3 , r3 , r8
|
||||
shadd8 r9 , r9 , r8
|
||||
shadd8 r3 , r3 , r8
|
||||
shadd8 r9 , r9 , r8 ; Filter2 >>= 3
|
||||
shadd8 r3 , r3 , r8 ; Filter1 >>= 3
|
||||
|
||||
;calculate output
|
||||
sub src, src, pstep, lsl #2
|
||||
|
||||
qadd8 r4, r4, r9 ; u = p0 + Filter2
|
||||
qsub8 r5, r5, r3 ; u = q0 - Filter1
|
||||
eor r4, r4, r2 ; *op0 = u^0x80
|
||||
eor r5, r5, r2 ; *oq0 = u^0x80
|
||||
|
||||
strb r4, [src, #-1] ; store the result
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
strb r5, [src], pstep
|
||||
|
||||
|simple_vskip_filter|
|
||||
subs r11, r11, #1
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrneh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrneh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
bne simple_vnext8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_armv6|
|
||||
|
||||
; Constant Pool
|
||||
c0x80808080 DCD 0x80808080
|
||||
c0x03030303 DCD 0x03030303
|
||||
c0x04040404 DCD 0x04040404
|
||||
|
||||
END
|
|
@ -1,273 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sixtap_predict8x4_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; stack unsigned char *dst_ptr,
|
||||
; stack int dst_pitch
|
||||
;-------------------------------------
|
||||
;note: In first pass, store the result in transpose(8linesx9columns) on stack. Temporary stack size is 184.
|
||||
;Line width is 20 that is 9 short data plus 2 to make it 4bytes aligned. In second pass, load data from stack,
|
||||
;and the result is stored in transpose.
|
||||
|vp8_sixtap_predict8x4_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
str r3, [sp, #-184]! ;reserve space on stack for temporary storage, store yoffset
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
add lr, sp, #4 ;point to temporary buffer
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;first-pass filter
|
||||
adr r12, filter8_coeff
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
add r3, r1, #10 ; preload next low
|
||||
pld [r0, r3]
|
||||
|
||||
add r2, r12, r2, lsl #4 ;calculate filter location
|
||||
add r0, r0, #3 ;adjust src only for loading convinience
|
||||
|
||||
ldr r3, [r2] ; load up packed filter coefficients
|
||||
ldr r4, [r2, #4]
|
||||
ldr r5, [r2, #8]
|
||||
|
||||
mov r2, #0x90000 ; height=9 is top part of counter
|
||||
|
||||
sub r1, r1, #8
|
||||
|
||||
|first_pass_hloop_v6|
|
||||
ldrb r6, [r0, #-5] ; load source data
|
||||
ldrb r7, [r0, #-4]
|
||||
ldrb r8, [r0, #-3]
|
||||
ldrb r9, [r0, #-2]
|
||||
ldrb r10, [r0, #-1]
|
||||
|
||||
orr r2, r2, #0x4 ; construct loop counter. width=8=4x2
|
||||
|
||||
pkhbt r6, r6, r7, lsl #16 ; r7 | r6
|
||||
pkhbt r7, r7, r8, lsl #16 ; r8 | r7
|
||||
|
||||
pkhbt r8, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
|first_pass_wloop_v6|
|
||||
smuad r11, r6, r3 ; vp8_filter[0], vp8_filter[1]
|
||||
smuad r12, r7, r3
|
||||
|
||||
ldrb r6, [r0], #1
|
||||
|
||||
smlad r11, r8, r4, r11 ; vp8_filter[2], vp8_filter[3]
|
||||
ldrb r7, [r0], #1
|
||||
smlad r12, r9, r4, r12
|
||||
|
||||
pkhbt r10, r10, r6, lsl #16 ; r10 | r9
|
||||
pkhbt r6, r6, r7, lsl #16 ; r11 | r10
|
||||
smlad r11, r10, r5, r11 ; vp8_filter[4], vp8_filter[5]
|
||||
smlad r12, r6, r5, r12
|
||||
|
||||
sub r2, r2, #1
|
||||
|
||||
add r11, r11, #0x40 ; round_shift_and_clamp
|
||||
tst r2, #0xff ; test loop counter
|
||||
usat r11, #8, r11, asr #7
|
||||
add r12, r12, #0x40
|
||||
strh r11, [lr], #20 ; result is transposed and stored, which
|
||||
usat r12, #8, r12, asr #7
|
||||
|
||||
strh r12, [lr], #20
|
||||
|
||||
movne r11, r6
|
||||
movne r12, r7
|
||||
|
||||
movne r6, r8
|
||||
movne r7, r9
|
||||
movne r8, r10
|
||||
movne r9, r11
|
||||
movne r10, r12
|
||||
|
||||
bne first_pass_wloop_v6
|
||||
|
||||
;;add r9, ppl, #30 ; attempt to load 2 adjacent cache lines
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [src, ppl]
|
||||
;;pld [src, r9]
|
||||
;;ENDIF
|
||||
|
||||
subs r2, r2, #0x10000
|
||||
|
||||
sub lr, lr, #158
|
||||
|
||||
add r0, r0, r1 ; move to next input line
|
||||
|
||||
add r11, r1, #18 ; preload next low. adding back block width(=8), which is subtracted earlier
|
||||
pld [r0, r11]
|
||||
|
||||
bne first_pass_hloop_v6
|
||||
|
||||
;second pass filter
|
||||
secondpass_filter
|
||||
ldr r3, [sp], #4 ; load back yoffset
|
||||
ldr r0, [sp, #216] ; load dst address from stack 180+36
|
||||
ldr r1, [sp, #220] ; load dst stride from stack 180+40
|
||||
|
||||
cmp r3, #0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
adr r12, filter8_coeff
|
||||
add lr, r12, r3, lsl #4 ;calculate filter location
|
||||
|
||||
mov r2, #0x00080000
|
||||
|
||||
ldr r3, [lr] ; load up packed filter coefficients
|
||||
ldr r4, [lr, #4]
|
||||
ldr r5, [lr, #8]
|
||||
|
||||
pkhbt r12, r4, r3 ; pack the filter differently
|
||||
pkhbt r11, r5, r4
|
||||
|
||||
second_pass_hloop_v6
|
||||
ldr r6, [sp] ; load the data
|
||||
ldr r7, [sp, #4]
|
||||
|
||||
orr r2, r2, #2 ; loop counter
|
||||
|
||||
second_pass_wloop_v6
|
||||
smuad lr, r3, r6 ; apply filter
|
||||
smulbt r10, r3, r6
|
||||
|
||||
ldr r8, [sp, #8]
|
||||
|
||||
smlad lr, r4, r7, lr
|
||||
smladx r10, r12, r7, r10
|
||||
|
||||
ldrh r9, [sp, #12]
|
||||
|
||||
smlad lr, r5, r8, lr
|
||||
smladx r10, r11, r8, r10
|
||||
|
||||
add sp, sp, #4
|
||||
smlatb r10, r5, r9, r10
|
||||
|
||||
sub r2, r2, #1
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
tst r2, #0xff
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r0], r1 ; the result is transposed back and stored
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
strb r10, [r0],r1
|
||||
|
||||
movne r6, r7
|
||||
movne r7, r8
|
||||
|
||||
bne second_pass_wloop_v6
|
||||
|
||||
subs r2, r2, #0x10000
|
||||
add sp, sp, #12 ; updata src for next loop (20-8)
|
||||
sub r0, r0, r1, lsl #2
|
||||
add r0, r0, #1
|
||||
|
||||
bne second_pass_hloop_v6
|
||||
|
||||
add sp, sp, #20
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r1, r1, #8
|
||||
mov r2, #9
|
||||
|
||||
skip_firstpass_hloop
|
||||
ldrb r4, [r0], #1 ; load data
|
||||
subs r2, r2, #1
|
||||
ldrb r5, [r0], #1
|
||||
strh r4, [lr], #20 ; store it to immediate buffer
|
||||
ldrb r6, [r0], #1 ; load data
|
||||
strh r5, [lr], #20
|
||||
ldrb r7, [r0], #1
|
||||
strh r6, [lr], #20
|
||||
ldrb r8, [r0], #1
|
||||
strh r7, [lr], #20
|
||||
ldrb r9, [r0], #1
|
||||
strh r8, [lr], #20
|
||||
ldrb r10, [r0], #1
|
||||
strh r9, [lr], #20
|
||||
ldrb r11, [r0], #1
|
||||
strh r10, [lr], #20
|
||||
add r0, r0, r1 ; move to next input line
|
||||
strh r11, [lr], #20
|
||||
|
||||
sub lr, lr, #158 ; move over to next column
|
||||
bne skip_firstpass_hloop
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;--------------------
|
||||
skip_secondpass_filter
|
||||
mov r2, #8
|
||||
add sp, sp, #4 ;start from src[0] instead of src[-2]
|
||||
|
||||
skip_secondpass_hloop
|
||||
ldr r6, [sp], #4
|
||||
subs r2, r2, #1
|
||||
ldr r8, [sp], #4
|
||||
|
||||
mov r7, r6, lsr #16 ; unpack
|
||||
strb r6, [r0], r1
|
||||
mov r9, r8, lsr #16
|
||||
strb r7, [r0], r1
|
||||
add sp, sp, #12 ; 20-8
|
||||
strb r8, [r0], r1
|
||||
strb r9, [r0], r1
|
||||
|
||||
sub r0, r0, r1, lsl #2
|
||||
add r0, r0, #1
|
||||
|
||||
bne skip_secondpass_hloop
|
||||
|
||||
add sp, sp, #16 ; 180 - (160 +4)
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
filter8_coeff
|
||||
DCD 0x00000000, 0x00000080, 0x00000000, 0x00000000
|
||||
DCD 0xfffa0000, 0x000c007b, 0x0000ffff, 0x00000000
|
||||
DCD 0xfff50002, 0x0024006c, 0x0001fff8, 0x00000000
|
||||
DCD 0xfff70000, 0x0032005d, 0x0000fffa, 0x00000000
|
||||
DCD 0xfff00003, 0x004d004d, 0x0003fff0, 0x00000000
|
||||
DCD 0xfffa0000, 0x005d0032, 0x0000fff7, 0x00000000
|
||||
DCD 0xfff80001, 0x006c0024, 0x0002fff5, 0x00000000
|
||||
DCD 0xffff0000, 0x007b000c, 0x0000fffa, 0x00000000
|
||||
|
||||
;DCD 0, 0, 128, 0, 0, 0
|
||||
;DCD 0, -6, 123, 12, -1, 0
|
||||
;DCD 2, -11, 108, 36, -8, 1
|
||||
;DCD 0, -9, 93, 50, -6, 0
|
||||
;DCD 3, -16, 77, 77, -16, 3
|
||||
;DCD 0, -6, 50, 93, -9, 0
|
||||
;DCD 1, -8, 36, 108, -11, 2
|
||||
;DCD 0, -1, 12, 123, -6, 0
|
||||
|
||||
END
|
|
@ -1,113 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include <math.h>
|
||||
#include "vp8/common/filter.h"
|
||||
#include "bilinearfilter_arm.h"
|
||||
|
||||
void vp8_filter_block2d_bil_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_bilinear_predict4x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
|
||||
#define VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
const unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
|
|
@ -1,25 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#if HAVE_MEDIA
|
||||
extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
|
||||
|
||||
void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
|
||||
{
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
|
||||
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
|
||||
}
|
||||
#endif
|
|
@ -1,221 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include <math.h>
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
// 8x8
|
||||
extern void vp8_filter_block2d_first_pass_8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
// 16x16
|
||||
extern void vp8_filter_block2d_first_pass_16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter4_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#if HAVE_MEDIA
|
||||
void vp8_sixtap_predict4x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* Vfilter is null. First pass only */
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
/*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
|
||||
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Vfilter is a 4 tap filter */
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
/* Vfilter is 6 tap filter */
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
|
@ -1,181 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
|
||||
#if HAVE_MEDIA
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh);
|
||||
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh,
|
||||
unsigned char *v);
|
||||
|
||||
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
|
||||
extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
|
||||
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
#endif
|
||||
|
||||
#if HAVE_MEDIA
|
||||
/* ARMV6/MEDIA loopfilter functions*/
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
/* NEON loopfilter functions */
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
|
||||
}
|
||||
#endif
|
|
@ -1,699 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const uint8_t bifilter4_coeff[8][2] = {
|
||||
{128, 0},
|
||||
{112, 16},
|
||||
{ 96, 32},
|
||||
{ 80, 48},
|
||||
{ 64, 64},
|
||||
{ 48, 80},
|
||||
{ 32, 96},
|
||||
{ 16, 112}
|
||||
};
|
||||
|
||||
void vp8_bilinear_predict4x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8;
|
||||
uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8;
|
||||
uint8x16_t q1u8, q2u8;
|
||||
uint16x8_t q1u16, q2u16;
|
||||
uint16x8_t q7u16, q8u16, q9u16;
|
||||
uint64x2_t q4u64, q5u64;
|
||||
uint64x1_t d12u64;
|
||||
uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2;
|
||||
|
||||
if (xoffset == 0) { // skip_1stpass_filter
|
||||
uint32x2_t d28u32 = vdup_n_u32(0);
|
||||
uint32x2_t d29u32 = vdup_n_u32(0);
|
||||
uint32x2_t d30u32 = vdup_n_u32(0);
|
||||
|
||||
d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0);
|
||||
src_ptr += src_pixels_per_line;
|
||||
d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1);
|
||||
src_ptr += src_pixels_per_line;
|
||||
d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0);
|
||||
src_ptr += src_pixels_per_line;
|
||||
d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1);
|
||||
src_ptr += src_pixels_per_line;
|
||||
d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0);
|
||||
d28u8 = vreinterpret_u8_u32(d28u32);
|
||||
d29u8 = vreinterpret_u8_u32(d29u32);
|
||||
d30u8 = vreinterpret_u8_u32(d30u32);
|
||||
} else {
|
||||
d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d6u8 = vld1_u8(src_ptr);
|
||||
|
||||
q1u8 = vcombine_u8(d2u8, d3u8);
|
||||
q2u8 = vcombine_u8(d4u8, d5u8);
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
|
||||
q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
|
||||
d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)),
|
||||
vreinterpret_u32_u8(vget_high_u8(q1u8)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)),
|
||||
vreinterpret_u32_u8(vget_high_u8(q2u8)));
|
||||
d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q4u64)));
|
||||
d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q5u64)));
|
||||
|
||||
q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
|
||||
q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
|
||||
q9u16 = vmull_u8(d6u8, d0u8);
|
||||
|
||||
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8);
|
||||
|
||||
d28u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d29u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d30u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
}
|
||||
|
||||
// secondpass_filter
|
||||
if (yoffset == 0) { // skip_2ndpass_filter
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1);
|
||||
} else {
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q1u16 = vmull_u8(d28u8, d0u8);
|
||||
q2u16 = vmull_u8(d29u8, d0u8);
|
||||
|
||||
d26u8 = vext_u8(d28u8, d29u8, 4);
|
||||
d27u8 = vext_u8(d29u8, d30u8, 4);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d26u8, d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, d27u8, d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8;
|
||||
uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8;
|
||||
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
|
||||
uint16x8_t q1u16, q2u16, q3u16, q4u16;
|
||||
uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
|
||||
|
||||
if (xoffset == 0) { // skip_1stpass_filter
|
||||
d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d26u8 = vld1_u8(src_ptr);
|
||||
} else {
|
||||
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q5u8 = vld1q_u8(src_ptr);
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
|
||||
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
|
||||
q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
|
||||
|
||||
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
|
||||
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
|
||||
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
|
||||
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
|
||||
d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
|
||||
|
||||
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
|
||||
|
||||
d22u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d23u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d24u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d25u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d26u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
}
|
||||
|
||||
// secondpass_filter
|
||||
if (yoffset == 0) { // skip_2ndpass_filter
|
||||
vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d25u8);
|
||||
} else {
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q1u16 = vmull_u8(d22u8, d0u8);
|
||||
q2u16 = vmull_u8(d23u8, d0u8);
|
||||
q3u16 = vmull_u8(d24u8, d0u8);
|
||||
q4u16 = vmull_u8(d25u8, d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
|
||||
vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d5u8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8;
|
||||
uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8;
|
||||
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
|
||||
uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16;
|
||||
uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
|
||||
|
||||
if (xoffset == 0) { // skip_1stpass_filter
|
||||
d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d30u8 = vld1_u8(src_ptr);
|
||||
} else {
|
||||
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
|
||||
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
|
||||
|
||||
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
|
||||
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
|
||||
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
|
||||
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
|
||||
|
||||
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
|
||||
|
||||
d22u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d23u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d24u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d25u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
|
||||
// first_pass filtering on the rest 5-line data
|
||||
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q5u8 = vld1q_u8(src_ptr);
|
||||
|
||||
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
|
||||
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
|
||||
q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
|
||||
|
||||
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
|
||||
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
|
||||
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
|
||||
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
|
||||
d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
|
||||
|
||||
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
|
||||
|
||||
d26u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d27u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d28u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d29u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d30u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
}
|
||||
|
||||
// secondpass_filter
|
||||
if (yoffset == 0) { // skip_2ndpass_filter
|
||||
vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d29u8);
|
||||
} else {
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q1u16 = vmull_u8(d22u8, d0u8);
|
||||
q2u16 = vmull_u8(d23u8, d0u8);
|
||||
q3u16 = vmull_u8(d24u8, d0u8);
|
||||
q4u16 = vmull_u8(d25u8, d0u8);
|
||||
q5u16 = vmull_u8(d26u8, d0u8);
|
||||
q6u16 = vmull_u8(d27u8, d0u8);
|
||||
q7u16 = vmull_u8(d28u8, d0u8);
|
||||
q8u16 = vmull_u8(d29u8, d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
|
||||
q5u16 = vmlal_u8(q5u16, d27u8, d1u8);
|
||||
q6u16 = vmlal_u8(q6u16, d28u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d29u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d30u8, d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
d6u8 = vqrshrn_n_u16(q5u16, 7);
|
||||
d7u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d8u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d9u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
|
||||
vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d9u8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
int i;
|
||||
unsigned char tmp[272];
|
||||
unsigned char *tmpp;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
|
||||
uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8;
|
||||
uint8x8_t d19u8, d20u8, d21u8;
|
||||
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;
|
||||
uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16;
|
||||
uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16;
|
||||
|
||||
if (xoffset == 0) { // secondpass_bfilter16x16_only
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q11u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_pixels_per_line;
|
||||
for (i = 4; i > 0; i--) {
|
||||
q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
|
||||
q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
|
||||
q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
|
||||
q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
|
||||
q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
|
||||
q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
|
||||
q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
|
||||
q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
|
||||
q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
d6u8 = vqrshrn_n_u16(q5u16, 7);
|
||||
d7u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d8u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d9u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
|
||||
q1u8 = vcombine_u8(d2u8, d3u8);
|
||||
q2u8 = vcombine_u8(d4u8, d5u8);
|
||||
q3u8 = vcombine_u8(d6u8, d7u8);
|
||||
q4u8 = vcombine_u8(d8u8, d9u8);
|
||||
|
||||
q11u8 = q15u8;
|
||||
|
||||
vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (yoffset == 0) { // firstpass_bfilter16x16_only
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
for (i = 4; i > 0 ; i--) {
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
d3u8 = vld1_u8(src_ptr + 8);
|
||||
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr);
|
||||
d6u8 = vld1_u8(src_ptr + 8);
|
||||
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d8u8 = vld1_u8(src_ptr);
|
||||
d9u8 = vld1_u8(src_ptr + 8);
|
||||
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d11u8 = vld1_u8(src_ptr);
|
||||
d12u8 = vld1_u8(src_ptr + 8);
|
||||
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
q7u16 = vmull_u8(d2u8, d0u8);
|
||||
q8u16 = vmull_u8(d3u8, d0u8);
|
||||
q9u16 = vmull_u8(d5u8, d0u8);
|
||||
q10u16 = vmull_u8(d6u8, d0u8);
|
||||
q11u16 = vmull_u8(d8u8, d0u8);
|
||||
q12u16 = vmull_u8(d9u8, d0u8);
|
||||
q13u16 = vmull_u8(d11u8, d0u8);
|
||||
q14u16 = vmull_u8(d12u8, d0u8);
|
||||
|
||||
d2u8 = vext_u8(d2u8, d3u8, 1);
|
||||
d5u8 = vext_u8(d5u8, d6u8, 1);
|
||||
d8u8 = vext_u8(d8u8, d9u8, 1);
|
||||
d11u8 = vext_u8(d11u8, d12u8, 1);
|
||||
|
||||
q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
|
||||
q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
|
||||
q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
|
||||
|
||||
d3u8 = vext_u8(d3u8, d4u8, 1);
|
||||
d6u8 = vext_u8(d6u8, d7u8, 1);
|
||||
d9u8 = vext_u8(d9u8, d10u8, 1);
|
||||
d12u8 = vext_u8(d12u8, d13u8, 1);
|
||||
|
||||
q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
|
||||
q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
|
||||
q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
|
||||
|
||||
d14u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d15u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d16u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d17u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
d18u8 = vqrshrn_n_u16(q11u16, 7);
|
||||
d19u8 = vqrshrn_n_u16(q12u16, 7);
|
||||
d20u8 = vqrshrn_n_u16(q13u16, 7);
|
||||
d21u8 = vqrshrn_n_u16(q14u16, 7);
|
||||
|
||||
q7u8 = vcombine_u8(d14u8, d15u8);
|
||||
q8u8 = vcombine_u8(d16u8, d17u8);
|
||||
q9u8 = vcombine_u8(d18u8, d19u8);
|
||||
q10u8 =vcombine_u8(d20u8, d21u8);
|
||||
|
||||
vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
d3u8 = vld1_u8(src_ptr + 8);
|
||||
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr);
|
||||
d6u8 = vld1_u8(src_ptr + 8);
|
||||
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d8u8 = vld1_u8(src_ptr);
|
||||
d9u8 = vld1_u8(src_ptr + 8);
|
||||
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d11u8 = vld1_u8(src_ptr);
|
||||
d12u8 = vld1_u8(src_ptr + 8);
|
||||
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
// First Pass: output_height lines x output_width columns (17x16)
|
||||
tmpp = tmp;
|
||||
for (i = 3; i > 0; i--) {
|
||||
q7u16 = vmull_u8(d2u8, d0u8);
|
||||
q8u16 = vmull_u8(d3u8, d0u8);
|
||||
q9u16 = vmull_u8(d5u8, d0u8);
|
||||
q10u16 = vmull_u8(d6u8, d0u8);
|
||||
q11u16 = vmull_u8(d8u8, d0u8);
|
||||
q12u16 = vmull_u8(d9u8, d0u8);
|
||||
q13u16 = vmull_u8(d11u8, d0u8);
|
||||
q14u16 = vmull_u8(d12u8, d0u8);
|
||||
|
||||
d2u8 = vext_u8(d2u8, d3u8, 1);
|
||||
d5u8 = vext_u8(d5u8, d6u8, 1);
|
||||
d8u8 = vext_u8(d8u8, d9u8, 1);
|
||||
d11u8 = vext_u8(d11u8, d12u8, 1);
|
||||
|
||||
q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
|
||||
q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
|
||||
q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
|
||||
|
||||
d3u8 = vext_u8(d3u8, d4u8, 1);
|
||||
d6u8 = vext_u8(d6u8, d7u8, 1);
|
||||
d9u8 = vext_u8(d9u8, d10u8, 1);
|
||||
d12u8 = vext_u8(d12u8, d13u8, 1);
|
||||
|
||||
q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
|
||||
q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
|
||||
q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
|
||||
|
||||
d14u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d15u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d16u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d17u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
d18u8 = vqrshrn_n_u16(q11u16, 7);
|
||||
d19u8 = vqrshrn_n_u16(q12u16, 7);
|
||||
d20u8 = vqrshrn_n_u16(q13u16, 7);
|
||||
d21u8 = vqrshrn_n_u16(q14u16, 7);
|
||||
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
d3u8 = vld1_u8(src_ptr + 8);
|
||||
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr);
|
||||
d6u8 = vld1_u8(src_ptr + 8);
|
||||
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d8u8 = vld1_u8(src_ptr);
|
||||
d9u8 = vld1_u8(src_ptr + 8);
|
||||
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d11u8 = vld1_u8(src_ptr);
|
||||
d12u8 = vld1_u8(src_ptr + 8);
|
||||
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
q7u8 = vcombine_u8(d14u8, d15u8);
|
||||
q8u8 = vcombine_u8(d16u8, d17u8);
|
||||
q9u8 = vcombine_u8(d18u8, d19u8);
|
||||
q10u8 = vcombine_u8(d20u8, d21u8);
|
||||
|
||||
vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16;
|
||||
}
|
||||
|
||||
// First-pass filtering for rest 5 lines
|
||||
d14u8 = vld1_u8(src_ptr);
|
||||
d15u8 = vld1_u8(src_ptr + 8);
|
||||
d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
q9u16 = vmull_u8(d2u8, d0u8);
|
||||
q10u16 = vmull_u8(d3u8, d0u8);
|
||||
q11u16 = vmull_u8(d5u8, d0u8);
|
||||
q12u16 = vmull_u8(d6u8, d0u8);
|
||||
q13u16 = vmull_u8(d8u8, d0u8);
|
||||
q14u16 = vmull_u8(d9u8, d0u8);
|
||||
|
||||
d2u8 = vext_u8(d2u8, d3u8, 1);
|
||||
d5u8 = vext_u8(d5u8, d6u8, 1);
|
||||
d8u8 = vext_u8(d8u8, d9u8, 1);
|
||||
|
||||
q9u16 = vmlal_u8(q9u16, d2u8, d1u8);
|
||||
q11u16 = vmlal_u8(q11u16, d5u8, d1u8);
|
||||
q13u16 = vmlal_u8(q13u16, d8u8, d1u8);
|
||||
|
||||
d3u8 = vext_u8(d3u8, d4u8, 1);
|
||||
d6u8 = vext_u8(d6u8, d7u8, 1);
|
||||
d9u8 = vext_u8(d9u8, d10u8, 1);
|
||||
|
||||
q10u16 = vmlal_u8(q10u16, d3u8, d1u8);
|
||||
q12u16 = vmlal_u8(q12u16, d6u8, d1u8);
|
||||
q14u16 = vmlal_u8(q14u16, d9u8, d1u8);
|
||||
|
||||
q1u16 = vmull_u8(d11u8, d0u8);
|
||||
q2u16 = vmull_u8(d12u8, d0u8);
|
||||
q3u16 = vmull_u8(d14u8, d0u8);
|
||||
q4u16 = vmull_u8(d15u8, d0u8);
|
||||
|
||||
d11u8 = vext_u8(d11u8, d12u8, 1);
|
||||
d14u8 = vext_u8(d14u8, d15u8, 1);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d11u8, d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, d14u8, d1u8);
|
||||
|
||||
d12u8 = vext_u8(d12u8, d13u8, 1);
|
||||
d15u8 = vext_u8(d15u8, d16u8, 1);
|
||||
|
||||
q2u16 = vmlal_u8(q2u16, d12u8, d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, d15u8, d1u8);
|
||||
|
||||
d10u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d11u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
d12u8 = vqrshrn_n_u16(q11u16, 7);
|
||||
d13u8 = vqrshrn_n_u16(q12u16, 7);
|
||||
d14u8 = vqrshrn_n_u16(q13u16, 7);
|
||||
d15u8 = vqrshrn_n_u16(q14u16, 7);
|
||||
d16u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d17u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d18u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d19u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
|
||||
q5u8 = vcombine_u8(d10u8, d11u8);
|
||||
q6u8 = vcombine_u8(d12u8, d13u8);
|
||||
q7u8 = vcombine_u8(d14u8, d15u8);
|
||||
q8u8 = vcombine_u8(d16u8, d17u8);
|
||||
q9u8 = vcombine_u8(d18u8, d19u8);
|
||||
|
||||
vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q9u8);
|
||||
|
||||
// secondpass_filter
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
tmpp = tmp;
|
||||
q11u8 = vld1q_u8(tmpp);
|
||||
tmpp += 16;
|
||||
for (i = 4; i > 0; i--) {
|
||||
q12u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
q13u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
q14u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
q15u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
|
||||
q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
|
||||
q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
|
||||
q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
|
||||
q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
|
||||
q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
|
||||
q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
|
||||
q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
|
||||
q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
d6u8 = vqrshrn_n_u16(q5u16, 7);
|
||||
d7u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d8u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d9u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
|
||||
q1u8 = vcombine_u8(d2u8, d3u8);
|
||||
q2u8 = vcombine_u8(d4u8, d5u8);
|
||||
q3u8 = vcombine_u8(d6u8, d7u8);
|
||||
q4u8 = vcombine_u8(d8u8, d9u8);
|
||||
|
||||
q11u8 = q15u8;
|
||||
|
||||
vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_copy_mem8x4_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 8; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem16x16_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
uint8x16_t qtmp;
|
||||
|
||||
for (r = 0; r < 16; r++) {
|
||||
qtmp = vld1q_u8(src);
|
||||
vst1q_u8(dst, qtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_dc_only_idct_add_neon(
|
||||
int16_t input_dc,
|
||||
unsigned char *pred_ptr,
|
||||
int pred_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint16_t a1 = ((input_dc + 4) >> 3);
|
||||
uint32x2_t d2u32 = vdup_n_u32(0);
|
||||
uint8x8_t d2u8;
|
||||
uint16x8_t q1u16;
|
||||
uint16x8_t qAdd;
|
||||
|
||||
qAdd = vdupq_n_u16(a1);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
|
||||
q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
|
@ -1,142 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_dequant_idct_add_neon(
|
||||
int16_t *input,
|
||||
int16_t *dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int32x2_t d14, d15;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
int16x8_t q1, q2, q3, q4, q5, q6;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x2x2_t d2tmp0, d2tmp1;
|
||||
int16x4x2_t d2tmp2, d2tmp3;
|
||||
|
||||
d14 = d15 = vdup_n_s32(0);
|
||||
|
||||
// load input
|
||||
q3 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
input += 8;
|
||||
q4 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
|
||||
// load dq
|
||||
q5 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q6 = vld1q_s16(dq);
|
||||
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
|
||||
|
||||
q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
|
||||
vreinterpretq_u16_s16(q5)));
|
||||
q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
|
||||
vreinterpretq_u16_s16(q6)));
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
|
||||
q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
|
||||
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
|
||||
// loop 2
|
||||
q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
|
||||
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
|
||||
q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
|
||||
q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
|
||||
|
||||
q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
|
||||
vreinterpret_u8_s32(d14)));
|
||||
q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
|
||||
vreinterpret_u8_s32(d15)));
|
||||
|
||||
d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
|
||||
d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
|
||||
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 1);
|
||||
return;
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
|
||||
int16x8x2_t qQ, qDQC, qDQ;
|
||||
|
||||
qQ = vld2q_s16(d->qcoeff);
|
||||
qDQC = vld2q_s16(DQC);
|
||||
|
||||
qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
|
||||
qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
|
||||
|
||||
vst2q_s16(d->dqcoeff, qDQ);
|
||||
}
|
|
@ -1,96 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
/* place these declarations here because we don't want to maintain them
|
||||
* outside of this scope
|
||||
*/
|
||||
void idct_dequant_full_2x_neon(short *q, short *dq,
|
||||
unsigned char *dst, int stride);
|
||||
void idct_dequant_0_2x_neon(short *q, short dq,
|
||||
unsigned char *dst, int stride);
|
||||
|
||||
|
||||
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq,
|
||||
unsigned char *dst,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dst, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dst, stride);
|
||||
}
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q+32, dq, dst+8, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride);
|
||||
}
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (((short *)(eobs))[2])
|
||||
{
|
||||
if (((short *)eobs)[2] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
|
||||
if (((short *)(eobs))[3])
|
||||
{
|
||||
if (((short *)eobs)[3] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void idct_dequant_0_2x_neon(
|
||||
int16_t *q,
|
||||
int16_t dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int i, a0, a1;
|
||||
int16x8x2_t q2Add;
|
||||
int32x2_t d2s32 = vdup_n_s32(0),
|
||||
d4s32 = vdup_n_s32(0);
|
||||
uint8x8_t d2u8, d4u8;
|
||||
uint16x8_t q1u16, q2u16;
|
||||
|
||||
a0 = ((q[0] * dq) + 4) >> 3;
|
||||
a1 = ((q[16] * dq) + 4) >> 3;
|
||||
q[0] = q[16] = 0;
|
||||
q2Add.val[0] = vdupq_n_s16((int16_t)a0);
|
||||
q2Add.val[1] = vdupq_n_s16((int16_t)a1);
|
||||
|
||||
for (i = 0; i < 2; i++, dst += 4) {
|
||||
dst0 = dst;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
|
||||
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d2s32));
|
||||
q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d4s32));
|
||||
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
|
||||
|
||||
d2s32 = vreinterpret_s32_u8(d2u8);
|
||||
d4s32 = vreinterpret_s32_u8(d4u8);
|
||||
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 1);
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -1,185 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 17734;
|
||||
// because the lowest bit in 0x8a8c is 0, we can pre-shift this
|
||||
|
||||
void idct_dequant_full_2x_neon(
|
||||
int16_t *q,
|
||||
int16_t *dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0, *dst1;
|
||||
int32x2_t d28, d29, d30, d31;
|
||||
int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x4x2_t q2tmp0, q2tmp1;
|
||||
int16x8x2_t q2tmp2, q2tmp3;
|
||||
int16x4_t dLow0, dLow1, dHigh0, dHigh1;
|
||||
|
||||
d28 = d29 = d30 = d31 = vdup_n_s32(0);
|
||||
|
||||
// load dq
|
||||
q0 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q1 = vld1q_s16(dq);
|
||||
|
||||
// load q
|
||||
q2 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q3 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q4 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q5 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
|
||||
d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
|
||||
d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
|
||||
|
||||
q2 = vmulq_s16(q2, q0);
|
||||
q3 = vmulq_s16(q3, q1);
|
||||
q4 = vmulq_s16(q4, q0);
|
||||
q5 = vmulq_s16(q5, q1);
|
||||
|
||||
// vswp
|
||||
dLow0 = vget_low_s16(q2);
|
||||
dHigh0 = vget_high_s16(q2);
|
||||
dLow1 = vget_low_s16(q4);
|
||||
dHigh1 = vget_high_s16(q4);
|
||||
q2 = vcombine_s16(dLow0, dLow1);
|
||||
q4 = vcombine_s16(dHigh0, dHigh1);
|
||||
|
||||
dLow0 = vget_low_s16(q3);
|
||||
dHigh0 = vget_high_s16(q3);
|
||||
dLow1 = vget_low_s16(q5);
|
||||
dHigh1 = vget_high_s16(q5);
|
||||
q3 = vcombine_s16(dLow0, dLow1);
|
||||
q5 = vcombine_s16(dHigh0, dHigh1);
|
||||
|
||||
q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
|
||||
q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
|
||||
q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
|
||||
q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
|
||||
|
||||
q10 = vqaddq_s16(q2, q3);
|
||||
q11 = vqsubq_s16(q2, q3);
|
||||
|
||||
q8 = vshrq_n_s16(q8, 1);
|
||||
q9 = vshrq_n_s16(q9, 1);
|
||||
|
||||
q4 = vqaddq_s16(q4, q8);
|
||||
q5 = vqaddq_s16(q5, q9);
|
||||
|
||||
q2 = vqsubq_s16(q6, q5);
|
||||
q3 = vqaddq_s16(q7, q4);
|
||||
|
||||
q4 = vqaddq_s16(q10, q3);
|
||||
q5 = vqaddq_s16(q11, q2);
|
||||
q6 = vqsubq_s16(q11, q2);
|
||||
q7 = vqsubq_s16(q10, q3);
|
||||
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
|
||||
// loop 2
|
||||
q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
|
||||
q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
|
||||
q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
|
||||
q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
|
||||
|
||||
q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
|
||||
q10 = vshrq_n_s16(q10, 1);
|
||||
q11 = vshrq_n_s16(q11, 1);
|
||||
|
||||
q10 = vqaddq_s16(q2tmp2.val[1], q10);
|
||||
q11 = vqaddq_s16(q2tmp3.val[1], q11);
|
||||
|
||||
q8 = vqsubq_s16(q8, q11);
|
||||
q9 = vqaddq_s16(q9, q10);
|
||||
|
||||
q4 = vqaddq_s16(q2, q9);
|
||||
q5 = vqaddq_s16(q3, q8);
|
||||
q6 = vqsubq_s16(q3, q8);
|
||||
q7 = vqsubq_s16(q2, q9);
|
||||
|
||||
q4 = vrshrq_n_s16(q4, 3);
|
||||
q5 = vrshrq_n_s16(q5, 3);
|
||||
q6 = vrshrq_n_s16(q6, 3);
|
||||
q7 = vrshrq_n_s16(q7, 3);
|
||||
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
|
||||
q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
|
||||
vreinterpret_u8_s32(d28)));
|
||||
q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
|
||||
vreinterpret_u8_s32(d29)));
|
||||
q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
|
||||
vreinterpret_u8_s32(d30)));
|
||||
q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
|
||||
vreinterpret_u8_s32(d31)));
|
||||
|
||||
d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
|
||||
d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
|
||||
d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
|
||||
d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
|
||||
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
vst1_lane_s32((int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
|
||||
vst1_lane_s32((int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d31, 0);
|
||||
vst1_lane_s32((int32_t *)dst1, d31, 1);
|
||||
return;
|
||||
}
|
|
@ -1,102 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_short_inv_walsh4x4_neon(
|
||||
int16_t *input,
|
||||
int16_t *mb_dqcoeff) {
|
||||
int16x8_t q0s16, q1s16, q2s16, q3s16;
|
||||
int16x4_t d4s16, d5s16, d6s16, d7s16;
|
||||
int16x4x2_t v2tmp0, v2tmp1;
|
||||
int32x2x2_t v2tmp2, v2tmp3;
|
||||
int16x8_t qAdd3;
|
||||
|
||||
q0s16 = vld1q_s16(input);
|
||||
q1s16 = vld1q_s16(input + 8);
|
||||
|
||||
// 1st for loop
|
||||
d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
|
||||
v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_low_s16(q1s16)));
|
||||
v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_high_s16(q1s16)));
|
||||
v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[0]));
|
||||
v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[1]));
|
||||
|
||||
// 2nd for loop
|
||||
d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
|
||||
qAdd3 = vdupq_n_s16(3);
|
||||
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
|
||||
q0s16 = vaddq_s16(q0s16, qAdd3);
|
||||
q1s16 = vaddq_s16(q1s16, qAdd3);
|
||||
|
||||
q0s16 = vshrq_n_s16(q0s16, 3);
|
||||
q1s16 = vshrq_n_s16(q1s16, 3);
|
||||
|
||||
// store
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
return;
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
|
||||
static INLINE void vp8_loop_filter_simple_horizontal_edge_neon(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit) {
|
||||
uint8_t *sp;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q3s16, q13s16;
|
||||
int8x8_t d8s8, d9s8;
|
||||
int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
|
||||
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
|
||||
sp = s - (p << 1);
|
||||
q5u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q6u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q7u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q8u8 = vld1q_u8(sp);
|
||||
|
||||
q15u8 = vabdq_u8(q6u8, q7u8);
|
||||
q14u8 = vabdq_u8(q5u8, q8u8);
|
||||
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q13s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
q7u8 = veorq_u8(q7u8, q0u8);
|
||||
q8u8 = veorq_u8(q8u8, q0u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
|
||||
q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8),
|
||||
vreinterpretq_s8_u8(q8u8));
|
||||
|
||||
q2s16 = vmulq_s16(q2s16, q13s16);
|
||||
q3s16 = vmulq_s16(q3s16, q13s16);
|
||||
|
||||
q10u8 = vdupq_n_u8(3);
|
||||
q9u8 = vdupq_n_u8(4);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
|
||||
q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
|
||||
|
||||
d8s8 = vqmovn_s16(q2s16);
|
||||
d9s8 = vqmovn_s16(q3s16);
|
||||
q4s8 = vcombine_s8(d8s8, d9s8);
|
||||
|
||||
q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q3s8 = vshrq_n_s8(q3s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
|
||||
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
|
||||
vst1q_u8(s, q7u8);
|
||||
s -= p;
|
||||
vst1q_u8(s, q6u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
|
@ -1,283 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
static INLINE void write_2x4(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result) {
|
||||
/*
|
||||
* uint8x8x2_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 02 12 | 04 14 06 16
|
||||
01 11 03 13 | 05 15 07 17
|
||||
*/
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0],
|
||||
result.val[1]);
|
||||
const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]);
|
||||
const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]);
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 3);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
|
||||
}
|
||||
|
||||
static INLINE void write_2x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result,
|
||||
const uint8x8x2_t result2) {
|
||||
write_2x4(dst, pitch, result);
|
||||
dst += pitch * 8;
|
||||
write_2x4(dst, pitch, result2);
|
||||
}
|
||||
#else
|
||||
static INLINE void write_2x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result,
|
||||
const uint8x8x2_t result2) {
|
||||
vst2_lane_u8(dst, result, 0);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 1);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 2);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 3);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 4);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 5);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 6);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 7);
|
||||
dst += pitch;
|
||||
|
||||
vst2_lane_u8(dst, result2, 0);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 1);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 2);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 3);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 4);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 5);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 6);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 7);
|
||||
}
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
|
||||
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
static INLINE
|
||||
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
const uint8x8_t a = vld1_u8(src);
|
||||
const uint8x8_t b = vld1_u8(src + pitch * 1);
|
||||
const uint8x8_t c = vld1_u8(src + pitch * 2);
|
||||
const uint8x8_t d = vld1_u8(src + pitch * 3);
|
||||
const uint8x8_t e = vld1_u8(src + pitch * 4);
|
||||
const uint8x8_t f = vld1_u8(src + pitch * 5);
|
||||
const uint8x8_t g = vld1_u8(src + pitch * 6);
|
||||
const uint8x8_t h = vld1_u8(src + pitch * 7);
|
||||
const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a),
|
||||
vreinterpret_u32_u8(e));
|
||||
const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b),
|
||||
vreinterpret_u32_u8(f));
|
||||
const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c),
|
||||
vreinterpret_u32_u8(g));
|
||||
const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d),
|
||||
vreinterpret_u32_u8(h));
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]),
|
||||
vreinterpret_u16_u32(r26_u32.val[0]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]),
|
||||
vreinterpret_u16_u32(r37_u32.val[0]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
/*
|
||||
* after vtrn_u32
|
||||
00 01 02 03 | 40 41 42 43
|
||||
10 11 12 13 | 50 51 52 53
|
||||
20 21 22 23 | 60 61 62 63
|
||||
30 31 32 33 | 70 71 72 73
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 40 41 60 61
|
||||
02 03 22 23 | 42 43 62 63
|
||||
10 11 30 31 | 50 51 70 71
|
||||
12 13 32 33 | 52 52 72 73
|
||||
|
||||
00 01 20 21 | 40 41 60 61
|
||||
10 11 30 31 | 50 51 70 71
|
||||
02 03 22 23 | 42 43 62 63
|
||||
12 13 32 33 | 52 52 72 73
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 40 50 60 70
|
||||
01 11 21 31 | 41 51 61 71
|
||||
02 12 22 32 | 42 52 62 72
|
||||
03 13 23 33 | 43 53 63 73
|
||||
*/
|
||||
x.val[0] = r01_u8.val[0];
|
||||
x.val[1] = r01_u8.val[1];
|
||||
x.val[2] = r23_u8.val[0];
|
||||
x.val[3] = r23_u8.val[1];
|
||||
|
||||
return x;
|
||||
}
|
||||
#else
|
||||
static INLINE
|
||||
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0);
|
||||
x = vld4_lane_u8(src, x, 0);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 1);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 2);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 3);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 4);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 5);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 6);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 7);
|
||||
return x;
|
||||
}
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
|
||||
static INLINE void vp8_loop_filter_simple_vertical_edge_neon(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit) {
|
||||
unsigned char *src1;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q13s16, q11s16;
|
||||
int8x8_t d28s8, d29s8;
|
||||
int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8;
|
||||
uint8x8x4_t d0u8x4; // d6, d7, d8, d9
|
||||
uint8x8x4_t d1u8x4; // d10, d11, d12, d13
|
||||
uint8x8x2_t d2u8x2; // d12, d13
|
||||
uint8x8x2_t d3u8x2; // d14, d15
|
||||
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
|
||||
src1 = s - 2;
|
||||
d0u8x4 = read_4x8(src1, p);
|
||||
src1 += p * 8;
|
||||
d1u8x4 = read_4x8(src1, p);
|
||||
|
||||
q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10
|
||||
q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12
|
||||
q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11
|
||||
q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13
|
||||
|
||||
q15u8 = vabdq_u8(q5u8, q4u8);
|
||||
q14u8 = vabdq_u8(q3u8, q6u8);
|
||||
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q11s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
|
||||
q3u8 = veorq_u8(q3u8, q0u8);
|
||||
q4u8 = veorq_u8(q4u8, q0u8);
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
|
||||
q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8),
|
||||
vreinterpretq_s8_u8(q6u8));
|
||||
|
||||
q2s16 = vmulq_s16(q2s16, q11s16);
|
||||
q13s16 = vmulq_s16(q13s16, q11s16);
|
||||
|
||||
q11u8 = vdupq_n_u8(3);
|
||||
q12u8 = vdupq_n_u8(4);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8));
|
||||
q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8));
|
||||
|
||||
d28s8 = vqmovn_s16(q2s16);
|
||||
d29s8 = vqmovn_s16(q13s16);
|
||||
q14s8 = vcombine_s8(d28s8, d29s8);
|
||||
|
||||
q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q14s8 = vshrq_n_s8(q3s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8);
|
||||
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
|
||||
d2u8x2.val[0] = vget_low_u8(q6u8); // d12
|
||||
d2u8x2.val[1] = vget_low_u8(q7u8); // d14
|
||||
d3u8x2.val[0] = vget_high_u8(q6u8); // d13
|
||||
d3u8x2.val[1] = vget_high_u8(q7u8); // d15
|
||||
|
||||
src1 = s - 1;
|
||||
write_2x8(src1, p, d2u8x2, d3u8x2);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
|
@ -1,625 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
|
||||
static INLINE void vp8_mbloop_filter_neon(
|
||||
uint8x16_t qblimit, // mblimit
|
||||
uint8x16_t qlimit, // limit
|
||||
uint8x16_t qthresh, // thresh
|
||||
uint8x16_t q3, // p2
|
||||
uint8x16_t q4, // p2
|
||||
uint8x16_t q5, // p1
|
||||
uint8x16_t q6, // p0
|
||||
uint8x16_t q7, // q0
|
||||
uint8x16_t q8, // q1
|
||||
uint8x16_t q9, // q2
|
||||
uint8x16_t q10, // q3
|
||||
uint8x16_t *q4r, // p1
|
||||
uint8x16_t *q5r, // p1
|
||||
uint8x16_t *q6r, // p0
|
||||
uint8x16_t *q7r, // q0
|
||||
uint8x16_t *q8r, // q1
|
||||
uint8x16_t *q9r) { // q1
|
||||
uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16;
|
||||
int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8;
|
||||
uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16;
|
||||
int8x16_t q0s8, q12s8, q14s8, q15s8;
|
||||
int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29;
|
||||
|
||||
q11u8 = vabdq_u8(q3, q4);
|
||||
q12u8 = vabdq_u8(q4, q5);
|
||||
q13u8 = vabdq_u8(q5, q6);
|
||||
q14u8 = vabdq_u8(q8, q7);
|
||||
q1u8 = vabdq_u8(q9, q8);
|
||||
q0u8 = vabdq_u8(q10, q9);
|
||||
|
||||
q11u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q12u8 = vmaxq_u8(q13u8, q14u8);
|
||||
q1u8 = vmaxq_u8(q1u8, q0u8);
|
||||
q15u8 = vmaxq_u8(q11u8, q12u8);
|
||||
|
||||
q12u8 = vabdq_u8(q6, q7);
|
||||
|
||||
// vp8_hevmask
|
||||
q13u8 = vcgtq_u8(q13u8, qthresh);
|
||||
q14u8 = vcgtq_u8(q14u8, qthresh);
|
||||
q15u8 = vmaxq_u8(q15u8, q1u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qlimit, q15u8);
|
||||
|
||||
q1u8 = vabdq_u8(q5, q8);
|
||||
q12u8 = vqaddq_u8(q12u8, q12u8);
|
||||
|
||||
// vp8_filter() function
|
||||
// convert to signed
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q9 = veorq_u8(q9, q0u8);
|
||||
q8 = veorq_u8(q8, q0u8);
|
||||
q7 = veorq_u8(q7, q0u8);
|
||||
q6 = veorq_u8(q6, q0u8);
|
||||
q5 = veorq_u8(q5, q0u8);
|
||||
q4 = veorq_u8(q4, q0u8);
|
||||
|
||||
q1u8 = vshrq_n_u8(q1u8, 1);
|
||||
q12u8 = vqaddq_u8(q12u8, q1u8);
|
||||
|
||||
q14u8 = vorrq_u8(q13u8, q14u8);
|
||||
q12u8 = vcgeq_u8(qblimit, q12u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6)));
|
||||
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6)));
|
||||
|
||||
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
|
||||
vreinterpretq_s8_u8(q8));
|
||||
|
||||
q11s16 = vdupq_n_s16(3);
|
||||
q2s16 = vmulq_s16(q2s16, q11s16);
|
||||
q13s16 = vmulq_s16(q13s16, q11s16);
|
||||
|
||||
q15u8 = vandq_u8(q15u8, q12u8);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
|
||||
q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8));
|
||||
|
||||
q12u8 = vdupq_n_u8(3);
|
||||
q11u8 = vdupq_n_u8(4);
|
||||
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
d2 = vqmovn_s16(q2s16);
|
||||
d3 = vqmovn_s16(q13s16);
|
||||
q1s8 = vcombine_s8(d2, d3);
|
||||
q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8));
|
||||
q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8));
|
||||
q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q13s8 = vshrq_n_s8(q13s8, 3);
|
||||
|
||||
q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8);
|
||||
q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8);
|
||||
|
||||
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63);
|
||||
d5 = vdup_n_s8(9);
|
||||
d4 = vdup_n_s8(18);
|
||||
|
||||
q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5);
|
||||
q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5);
|
||||
d5 = vdup_n_s8(27);
|
||||
q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4);
|
||||
q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4);
|
||||
q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5);
|
||||
q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5);
|
||||
|
||||
d0 = vqshrn_n_s16(q0s16 , 7);
|
||||
d1 = vqshrn_n_s16(q11s16, 7);
|
||||
d24 = vqshrn_n_s16(q12s16, 7);
|
||||
d25 = vqshrn_n_s16(q13s16, 7);
|
||||
d28 = vqshrn_n_s16(q14s16, 7);
|
||||
d29 = vqshrn_n_s16(q15s16, 7);
|
||||
|
||||
q0s8 = vcombine_s8(d0, d1);
|
||||
q12s8 = vcombine_s8(d24, d25);
|
||||
q14s8 = vcombine_s8(d28, d29);
|
||||
|
||||
q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8);
|
||||
q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8);
|
||||
q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8);
|
||||
q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8);
|
||||
q15s8 = vqsubq_s8((q7s8), q14s8);
|
||||
q14s8 = vqaddq_s8((q6s8), q14s8);
|
||||
|
||||
q1u8 = vdupq_n_u8(0x80);
|
||||
*q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8);
|
||||
*q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8);
|
||||
*q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8);
|
||||
*q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8);
|
||||
*q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8);
|
||||
*q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
src -= (pitch << 2);
|
||||
|
||||
q3 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q4 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q5 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q6 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q7 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q8 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q9 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q10 = vld1q_u8(src);
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
src -= (pitch * 6);
|
||||
vst1q_u8(src, q4);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q5);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q6);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q7);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q8);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q9);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
u -= (pitch << 2);
|
||||
v -= (pitch << 2);
|
||||
|
||||
d6 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d7 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d8 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d9 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d10 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d11 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d12 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d13 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d14 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d15 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d16 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d17 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d18 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d19 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d20 = vld1_u8(u);
|
||||
d21 = vld1_u8(v);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
u -= (pitch * 6);
|
||||
v -= (pitch * 6);
|
||||
vst1_u8(u, vget_low_u8(q4));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q4));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q5));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q5));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q6));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q6));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q7));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q7));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q8));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q8));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q9));
|
||||
vst1_u8(v, vget_high_u8(q9));
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
unsigned char *s1, *s2;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
s1 = src - 4;
|
||||
s2 = s1 + 8 * pitch;
|
||||
d6 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d7 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d8 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d9 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d10 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d11 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d12 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d13 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d14 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d15 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d16 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d17 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d18 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d19 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d20 = vld1_u8(s1);
|
||||
d21 = vld1_u8(s2);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
s1 -= 7 * pitch;
|
||||
s2 -= 7 * pitch;
|
||||
|
||||
vst1_u8(s1, vget_low_u8(q3));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q3));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q4));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q4));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q5));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q5));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q6));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q6));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q7));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q7));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q8));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q8));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q9));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q9));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q10));
|
||||
vst1_u8(s2, vget_high_u8(q10));
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
unsigned char *us, *ud;
|
||||
unsigned char *vs, *vd;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
us = u - 4;
|
||||
vs = v - 4;
|
||||
d6 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d7 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d8 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d9 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d10 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d11 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d12 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d13 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d14 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d15 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d16 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d17 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d18 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d19 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d20 = vld1_u8(us);
|
||||
d21 = vld1_u8(vs);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
ud = u - 4;
|
||||
vst1_u8(ud, vget_low_u8(q3));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q4));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q5));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q6));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q7));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q8));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q9));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q10));
|
||||
|
||||
vd = v - 4;
|
||||
vst1_u8(vd, vget_high_u8(q3));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q4));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q5));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q6));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q7));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q8));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q9));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q10));
|
||||
return;
|
||||
}
|
|
@ -1,123 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_short_idct4x4llm_neon(
|
||||
int16_t *input,
|
||||
unsigned char *pred_ptr,
|
||||
int pred_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint32x2_t d6u32 = vdup_n_u32(0);
|
||||
uint8x8_t d1u8;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
uint16x8_t q1u16;
|
||||
int16x8_t q1s16, q2s16, q3s16, q4s16;
|
||||
int32x2x2_t v2tmp0, v2tmp1;
|
||||
int16x4x2_t v2tmp2, v2tmp3;
|
||||
|
||||
d2 = vld1_s16(input);
|
||||
d3 = vld1_s16(input + 4);
|
||||
d4 = vld1_s16(input + 8);
|
||||
d5 = vld1_s16(input + 12);
|
||||
|
||||
// 1st for loop
|
||||
q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here
|
||||
q2s16 = vcombine_s16(d3, d5);
|
||||
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
|
||||
// 2nd for loop
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
|
||||
q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
|
||||
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
|
||||
q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
|
||||
|
||||
// dc_only_idct_add
|
||||
for (i = 0; i < 2; i++, q1s16 = q2s16) {
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16),
|
||||
vreinterpret_u8_u32(d6u32));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
return;
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,550 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
|
||||
static INLINE void vp8_loop_filter_neon(
|
||||
uint8x16_t qblimit, // flimit
|
||||
uint8x16_t qlimit, // limit
|
||||
uint8x16_t qthresh, // thresh
|
||||
uint8x16_t q3, // p3
|
||||
uint8x16_t q4, // p2
|
||||
uint8x16_t q5, // p1
|
||||
uint8x16_t q6, // p0
|
||||
uint8x16_t q7, // q0
|
||||
uint8x16_t q8, // q1
|
||||
uint8x16_t q9, // q2
|
||||
uint8x16_t q10, // q3
|
||||
uint8x16_t *q5r, // p1
|
||||
uint8x16_t *q6r, // p0
|
||||
uint8x16_t *q7r, // q0
|
||||
uint8x16_t *q8r) { // q1
|
||||
uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q11s16;
|
||||
uint16x8_t q4u16;
|
||||
int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
|
||||
int8x8_t d2s8, d3s8;
|
||||
|
||||
q11u8 = vabdq_u8(q3, q4);
|
||||
q12u8 = vabdq_u8(q4, q5);
|
||||
q13u8 = vabdq_u8(q5, q6);
|
||||
q14u8 = vabdq_u8(q8, q7);
|
||||
q3 = vabdq_u8(q9, q8);
|
||||
q4 = vabdq_u8(q10, q9);
|
||||
|
||||
q11u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q12u8 = vmaxq_u8(q13u8, q14u8);
|
||||
q3 = vmaxq_u8(q3, q4);
|
||||
q15u8 = vmaxq_u8(q11u8, q12u8);
|
||||
|
||||
q9 = vabdq_u8(q6, q7);
|
||||
|
||||
// vp8_hevmask
|
||||
q13u8 = vcgtq_u8(q13u8, qthresh);
|
||||
q14u8 = vcgtq_u8(q14u8, qthresh);
|
||||
q15u8 = vmaxq_u8(q15u8, q3);
|
||||
|
||||
q2u8 = vabdq_u8(q5, q8);
|
||||
q9 = vqaddq_u8(q9, q9);
|
||||
|
||||
q15u8 = vcgeq_u8(qlimit, q15u8);
|
||||
|
||||
// vp8_filter() function
|
||||
// convert to signed
|
||||
q10 = vdupq_n_u8(0x80);
|
||||
q8 = veorq_u8(q8, q10);
|
||||
q7 = veorq_u8(q7, q10);
|
||||
q6 = veorq_u8(q6, q10);
|
||||
q5 = veorq_u8(q5, q10);
|
||||
|
||||
q2u8 = vshrq_n_u8(q2u8, 1);
|
||||
q9 = vqaddq_u8(q9, q2u8);
|
||||
|
||||
q10 = vdupq_n_u8(3);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6)));
|
||||
q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6)));
|
||||
|
||||
q9 = vcgeq_u8(qblimit, q9);
|
||||
|
||||
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
|
||||
vreinterpretq_s8_u8(q8));
|
||||
|
||||
q14u8 = vorrq_u8(q13u8, q14u8);
|
||||
|
||||
q4u16 = vmovl_u8(vget_low_u8(q10));
|
||||
q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
|
||||
q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
|
||||
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
|
||||
q15u8 = vandq_u8(q15u8, q9);
|
||||
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
|
||||
q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
|
||||
|
||||
q9 = vdupq_n_u8(4);
|
||||
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
d2s8 = vqmovn_s16(q2s16);
|
||||
d3s8 = vqmovn_s16(q11s16);
|
||||
q1s8 = vcombine_s8(d2s8, d3s8);
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
|
||||
q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
|
||||
q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q1s8 = vshrq_n_s8(q1s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
|
||||
|
||||
q1s8 = vrshrq_n_s8(q1s8, 1);
|
||||
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
|
||||
q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
|
||||
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
*q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
|
||||
*q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
*q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
*q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
src -= (pitch << 2);
|
||||
|
||||
q3 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q4 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q5 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q6 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q7 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q8 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q9 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q10 = vld1q_u8(src);
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
src -= (pitch * 5);
|
||||
vst1q_u8(src, q5);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q6);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q7);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
u -= (pitch << 2);
|
||||
v -= (pitch << 2);
|
||||
|
||||
d6 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d7 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d8 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d9 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d10 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d11 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d12 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d13 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d14 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d15 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d16 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d17 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d18 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d19 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d20 = vld1_u8(u);
|
||||
d21 = vld1_u8(v);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
u -= (pitch * 5);
|
||||
vst1_u8(u, vget_low_u8(q5));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q6));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q7));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q8));
|
||||
|
||||
v -= (pitch * 5);
|
||||
vst1_u8(v, vget_high_u8(q5));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q6));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q7));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q8));
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void write_4x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x4_t result) {
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
/*
|
||||
* uint8x8x4_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
20 21 22 23 | 24 25 26 27
|
||||
30 31 32 33 | 34 35 36 37
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 04 05 24 25
|
||||
02 03 22 23 | 06 07 26 27
|
||||
10 11 30 31 | 14 15 34 35
|
||||
12 13 32 33 | 16 17 36 37
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 04 14 24 34
|
||||
01 11 21 31 | 05 15 25 35
|
||||
02 12 22 32 | 06 16 26 36
|
||||
03 13 23 33 | 07 17 27 37
|
||||
*/
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
|
||||
vreinterpret_u16_u8(result.val[2]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
|
||||
vreinterpret_u16_u8(result.val[3]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
|
||||
const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
|
||||
const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
|
||||
const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
|
||||
#else
|
||||
vst4_lane_u8(dst, result, 0);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 1);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 2);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 3);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 4);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 5);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 6);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 7);
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
unsigned char *s, *d;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
s = src - 4;
|
||||
d6 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d10 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d12 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d14 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d16 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d18 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d20 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d7 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d9 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d11 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d13 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d15 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d17 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d19 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d21 = vld1_u8(s);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
|
||||
d = src - 2;
|
||||
write_4x8(d, pitch, q4ResultL);
|
||||
d += pitch * 8;
|
||||
write_4x8(d, pitch, q4ResultH);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
unsigned char *us, *ud;
|
||||
unsigned char *vs, *vd;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
us = u - 4;
|
||||
d6 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d8 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d10 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d12 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d14 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d16 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d18 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d20 = vld1_u8(us);
|
||||
|
||||
vs = v - 4;
|
||||
d7 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d9 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d11 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d13 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d15 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d17 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d19 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d21 = vld1_u8(vs);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
ud = u - 2;
|
||||
write_4x8(ud, pitch, q4ResultL);
|
||||
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
vd = v - 2;
|
||||
write_4x8(vd, pitch, q4ResultH);
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
const unsigned char vp8_block2left[25] =
|
||||
{
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
|
||||
};
|
||||
const unsigned char vp8_block2above[25] =
|
||||
{
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
|
||||
};
|
|
@ -1,312 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_BLOCKD_H_
|
||||
#define VP8_COMMON_BLOCKD_H_
|
||||
|
||||
void vpx_log(const char *format, ...);
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "mv.h"
|
||||
#include "treecoder.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*#define DCPRED 1*/
|
||||
#define DCPREDSIMTHRESH 0
|
||||
#define DCPREDCNTTHRESH 3
|
||||
|
||||
#define MB_FEATURE_TREE_PROBS 3
|
||||
#define MAX_MB_SEGMENTS 4
|
||||
|
||||
#define MAX_REF_LF_DELTAS 4
|
||||
#define MAX_MODE_LF_DELTAS 4
|
||||
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_DELTADATA 0
|
||||
#define SEGMENT_ABSDATA 1
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int r, c;
|
||||
} POS;
|
||||
|
||||
#define PLANE_TYPE_Y_NO_DC 0
|
||||
#define PLANE_TYPE_Y2 1
|
||||
#define PLANE_TYPE_UV 2
|
||||
#define PLANE_TYPE_Y_WITH_DC 3
|
||||
|
||||
|
||||
typedef char ENTROPY_CONTEXT;
|
||||
typedef struct
|
||||
{
|
||||
ENTROPY_CONTEXT y1[4];
|
||||
ENTROPY_CONTEXT u[2];
|
||||
ENTROPY_CONTEXT v[2];
|
||||
ENTROPY_CONTEXT y2;
|
||||
} ENTROPY_CONTEXT_PLANES;
|
||||
|
||||
extern const unsigned char vp8_block2left[25];
|
||||
extern const unsigned char vp8_block2above[25];
|
||||
|
||||
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
|
||||
Dest = (A)+(B);
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
KEY_FRAME = 0,
|
||||
INTER_FRAME = 1
|
||||
} FRAME_TYPE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DC_PRED, /* average of above and left pixels */
|
||||
V_PRED, /* vertical prediction */
|
||||
H_PRED, /* horizontal prediction */
|
||||
TM_PRED, /* Truemotion prediction */
|
||||
B_PRED, /* block based prediction, each block has its own prediction mode */
|
||||
|
||||
NEARESTMV,
|
||||
NEARMV,
|
||||
ZEROMV,
|
||||
NEWMV,
|
||||
SPLITMV,
|
||||
|
||||
MB_MODE_COUNT
|
||||
} MB_PREDICTION_MODE;
|
||||
|
||||
/* Macroblock level features */
|
||||
typedef enum
|
||||
{
|
||||
MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
|
||||
MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
|
||||
MB_LVL_MAX = 2 /* Number of MB level features supported */
|
||||
|
||||
} MB_LVL_FEATURES;
|
||||
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_ALTQ 0x01
|
||||
#define SEGMENT_ALT_LF 0x02
|
||||
|
||||
#define VP8_YMODES (B_PRED + 1)
|
||||
#define VP8_UV_MODES (TM_PRED + 1)
|
||||
|
||||
#define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
|
||||
|
||||
typedef enum
|
||||
{
|
||||
B_DC_PRED, /* average of above and left pixels */
|
||||
B_TM_PRED,
|
||||
|
||||
B_VE_PRED, /* vertical prediction */
|
||||
B_HE_PRED, /* horizontal prediction */
|
||||
|
||||
B_LD_PRED,
|
||||
B_RD_PRED,
|
||||
|
||||
B_VR_PRED,
|
||||
B_VL_PRED,
|
||||
B_HD_PRED,
|
||||
B_HU_PRED,
|
||||
|
||||
LEFT4X4,
|
||||
ABOVE4X4,
|
||||
ZERO4X4,
|
||||
NEW4X4,
|
||||
|
||||
B_MODE_COUNT
|
||||
} B_PREDICTION_MODE;
|
||||
|
||||
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
|
||||
#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
|
||||
|
||||
/* For keyframes, intra block modes are predicted by the (already decoded)
|
||||
modes for the Y blocks to the left and above us; for interframes, there
|
||||
is a single probability table. */
|
||||
|
||||
union b_mode_info
|
||||
{
|
||||
B_PREDICTION_MODE as_mode;
|
||||
int_mv mv;
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INTRA_FRAME = 0,
|
||||
LAST_FRAME = 1,
|
||||
GOLDEN_FRAME = 2,
|
||||
ALTREF_FRAME = 3,
|
||||
MAX_REF_FRAMES = 4
|
||||
} MV_REFERENCE_FRAME;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t mode, uv_mode;
|
||||
uint8_t ref_frame;
|
||||
uint8_t is_4x4;
|
||||
int_mv mv;
|
||||
|
||||
uint8_t partitioning;
|
||||
uint8_t mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
||||
uint8_t need_to_clamp_mvs;
|
||||
uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */
|
||||
} MB_MODE_INFO;
|
||||
|
||||
typedef struct modeinfo
|
||||
{
|
||||
MB_MODE_INFO mbmi;
|
||||
union b_mode_info bmi[16];
|
||||
} MODE_INFO;
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
/* The mb-level information needed to be stored for higher-resolution encoder */
|
||||
typedef struct
|
||||
{
|
||||
MB_PREDICTION_MODE mode;
|
||||
MV_REFERENCE_FRAME ref_frame;
|
||||
int_mv mv;
|
||||
int dissim; /* dissimilarity level of the macroblock */
|
||||
} LOWER_RES_MB_INFO;
|
||||
|
||||
/* The frame-level information needed to be stored for higher-resolution
|
||||
* encoder */
|
||||
typedef struct
|
||||
{
|
||||
FRAME_TYPE frame_type;
|
||||
int is_frame_dropped;
|
||||
// The frame rate for the lowest resolution.
|
||||
double low_res_framerate;
|
||||
/* The frame number of each reference frames */
|
||||
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
|
||||
// The video frame counter value for the key frame, for lowest resolution.
|
||||
unsigned int key_frame_counter_value;
|
||||
LOWER_RES_MB_INFO *mb_info;
|
||||
} LOWER_RES_FRAME_INFO;
|
||||
#endif
|
||||
|
||||
typedef struct blockd
|
||||
{
|
||||
short *qcoeff;
|
||||
short *dqcoeff;
|
||||
unsigned char *predictor;
|
||||
short *dequant;
|
||||
|
||||
int offset;
|
||||
char *eob;
|
||||
|
||||
union b_mode_info bmi;
|
||||
} BLOCKD;
|
||||
|
||||
typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
typedef struct macroblockd
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
DECLARE_ALIGNED(16, short, qcoeff[400]);
|
||||
DECLARE_ALIGNED(16, short, dqcoeff[400]);
|
||||
DECLARE_ALIGNED(16, char, eobs[25]);
|
||||
|
||||
DECLARE_ALIGNED(16, short, dequant_y1[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y2[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_uv[16]);
|
||||
|
||||
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
|
||||
BLOCKD block[25];
|
||||
int fullpixel_mask;
|
||||
|
||||
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
|
||||
YV12_BUFFER_CONFIG dst;
|
||||
|
||||
MODE_INFO *mode_info_context;
|
||||
int mode_info_stride;
|
||||
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int up_available;
|
||||
int left_available;
|
||||
|
||||
unsigned char *recon_above[3];
|
||||
unsigned char *recon_left[3];
|
||||
int recon_left_stride[2];
|
||||
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context;
|
||||
ENTROPY_CONTEXT_PLANES *left_context;
|
||||
|
||||
/* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
|
||||
unsigned char segmentation_enabled;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
|
||||
unsigned char update_mb_segmentation_map;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char update_mb_segmentation_data;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char mb_segement_abs_delta;
|
||||
|
||||
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
|
||||
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
|
||||
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
|
||||
|
||||
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
|
||||
|
||||
/* mode_based Loop filter adjustment */
|
||||
unsigned char mode_ref_lf_delta_enabled;
|
||||
unsigned char mode_ref_lf_delta_update;
|
||||
|
||||
/* Delta values have the range +/- MAX_LOOP_FILTER */
|
||||
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
|
||||
/* Distance of MB away from frame edges */
|
||||
int mb_to_left_edge;
|
||||
int mb_to_right_edge;
|
||||
int mb_to_top_edge;
|
||||
int mb_to_bottom_edge;
|
||||
|
||||
|
||||
|
||||
vp8_subpix_fn_t subpixel_predict;
|
||||
vp8_subpix_fn_t subpixel_predict8x4;
|
||||
vp8_subpix_fn_t subpixel_predict8x8;
|
||||
vp8_subpix_fn_t subpixel_predict16x16;
|
||||
|
||||
void *current_bc;
|
||||
|
||||
int corrupted;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
/* This is an intermediate buffer currently used in sub-pixel motion search
|
||||
* to keep a copy of the reference area. This buffer can be used for other
|
||||
* purpose.
|
||||
*/
|
||||
DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
|
||||
#endif
|
||||
} MACROBLOCKD;
|
||||
|
||||
|
||||
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
|
||||
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_BLOCKD_H_
|
|
@ -1,197 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_COEFUPDATEPROBS_H_
|
||||
#define VP8_COMMON_COEFUPDATEPROBS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Update probabilities for the nodes in the token entropy tree.
|
||||
Generated file included by entropy.c */
|
||||
|
||||
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] =
|
||||
{
|
||||
{
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
|
||||
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_COEFUPDATEPROBS_H_
|
|
@ -1,48 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_COMMON_H_
|
||||
#define VP8_COMMON_COMMON_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* Interface header for common constant data structures and lookup tables */
|
||||
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Only need this for fixed-size arrays, for structs just assign. */
|
||||
|
||||
#define vp8_copy( Dest, Src) { \
|
||||
assert( sizeof( Dest) == sizeof( Src)); \
|
||||
memcpy( Dest, Src, sizeof( Src)); \
|
||||
}
|
||||
|
||||
/* Use this for variably-sized arrays. */
|
||||
|
||||
#define vp8_copy_array( Dest, Src, N) { \
|
||||
assert( sizeof( *Dest) == sizeof( *Src)); \
|
||||
memcpy( Dest, Src, N * sizeof( *Src)); \
|
||||
}
|
||||
|
||||
#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest));
|
||||
|
||||
#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest));
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_COMMON_H_
|
|
@ -1,399 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropy.h"
|
||||
|
||||
/* *** GENERATED FILE: DO NOT EDIT *** */
|
||||
|
||||
#if 0
|
||||
int Contexts[vp8_coef_counter_dimen];
|
||||
|
||||
const int default_contexts[vp8_coef_counter_dimen] =
|
||||
{
|
||||
{
|
||||
// Block Type ( 0 )
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
{30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,},
|
||||
{26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,},
|
||||
{10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
{25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,},
|
||||
{9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,},
|
||||
{1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
{26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,},
|
||||
{ 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
{10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,},
|
||||
{ 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
{10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,},
|
||||
{ 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
{40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,},
|
||||
{ 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
{ 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Block Type ( 1 )
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
{3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,},
|
||||
{8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,},
|
||||
{9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
{12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,},
|
||||
{7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
{15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,},
|
||||
{7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,},
|
||||
{1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
{19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,},
|
||||
{1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
{12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,},
|
||||
{4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,},
|
||||
{ 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
{12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,},
|
||||
{ 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
{61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,},
|
||||
{15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,},
|
||||
{ 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
{ 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,},
|
||||
{ 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Block Type ( 2 )
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
{ 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,},
|
||||
{1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,},
|
||||
{1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
{1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,},
|
||||
{1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
{ 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,},
|
||||
{ 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,},
|
||||
{ 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
{ 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,},
|
||||
{ 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
{ 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,},
|
||||
{ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
{ 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,},
|
||||
{ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
{ 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,},
|
||||
{ 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Block Type ( 3 )
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
{2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,},
|
||||
{8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,},
|
||||
{11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
{9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,},
|
||||
{12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,},
|
||||
{10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
{6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,},
|
||||
{6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,},
|
||||
{3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
{11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,},
|
||||
{9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,},
|
||||
{4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
{4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,},
|
||||
{3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,},
|
||||
{1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
{8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,},
|
||||
{1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
{27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,},
|
||||
{5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,},
|
||||
{1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
{ 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,},
|
||||
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
//Update probabilities for the nodes in the token entropy tree.
|
||||
const vp8_prob tree_update_probs[vp8_coef_tree_dimen] =
|
||||
{
|
||||
{
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
|
||||
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
};
|
||||
#endif
|
|
@ -1,32 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
/* Copy 2 macroblocks to a buffer */
|
||||
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
|
||||
unsigned char *dst_ptr, int dst_stride,
|
||||
int height)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < height; r++)
|
||||
{
|
||||
memcpy(dst_ptr, src_ptr, 32);
|
||||
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
}
|
||||
}
|
|
@ -1,155 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include "blockd.h"
|
||||
|
||||
|
||||
void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame)
|
||||
{
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int mb_index = 0;
|
||||
FILE *mvs = fopen("mvs.stt", "a");
|
||||
|
||||
/* print out the macroblock Y modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the macroblock UV modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "UV Modes for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "Mbs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++)
|
||||
{
|
||||
int b_col;
|
||||
int bindex;
|
||||
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++)
|
||||
{
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
|
||||
if (mi[mb_index].mbmi.mode == B_PRED)
|
||||
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
|
||||
else
|
||||
fprintf(mvs, "xx ");
|
||||
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the macroblock mvs */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++)
|
||||
{
|
||||
int b_col;
|
||||
int bindex;
|
||||
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++)
|
||||
{
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col);
|
||||
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
|
||||
fclose(mvs);
|
||||
}
|
|
@ -1,200 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
#define VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*Generated file, included by entropy.c*/
|
||||
|
||||
|
||||
static const vp8_prob default_coef_probs [BLOCK_TYPES]
|
||||
[COEF_BANDS]
|
||||
[PREV_COEF_CONTEXTS]
|
||||
[ENTROPY_NODES] =
|
||||
{
|
||||
{ /* Block Type ( 0 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
|
||||
{ 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
|
||||
{ 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
|
||||
{ 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
|
||||
{ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
|
||||
{ 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
|
||||
{ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
|
||||
{ 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
|
||||
{ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
|
||||
{ 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
|
||||
{ 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
|
||||
{ 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
|
||||
{ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 1 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
|
||||
{ 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
|
||||
{ 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
|
||||
{ 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
|
||||
{ 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
|
||||
{ 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
|
||||
{ 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
|
||||
{ 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
|
||||
{ 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
|
||||
{ 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
|
||||
{ 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
|
||||
{ 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
|
||||
{ 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
|
||||
{ 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
|
||||
{ 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 2 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
|
||||
{ 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
|
||||
{ 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
|
||||
{ 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
|
||||
{ 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
|
||||
{ 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
|
||||
{ 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
|
||||
{ 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 3 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
|
||||
{ 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
|
||||
{ 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
|
||||
{ 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
|
||||
{ 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
|
||||
{ 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
|
||||
{ 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
|
||||
{ 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
|
||||
{ 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
|
||||
{ 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
|
||||
{ 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
|
||||
{ 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
|
||||
{ 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
|
||||
{ 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
|
||||
{ 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
|
||||
{
|
||||
int i;
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
DQ[i] = Q[i] * DQC[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
unsigned char *dest, int stride)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
|
||||
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
|
||||
|
||||
memset(input, 0, 32);
|
||||
|
||||
}
|
|
@ -1,188 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "entropy.h"
|
||||
#include "blockd.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#include "coefupdateprobs.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) =
|
||||
{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) =
|
||||
{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char,
|
||||
vp8_prev_token_class[MAX_ENTROPY_TOKENS]) =
|
||||
{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
|
||||
|
||||
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
|
||||
{
|
||||
0, 1, 4, 8,
|
||||
5, 2, 3, 6,
|
||||
9, 12, 13, 10,
|
||||
7, 11, 14, 15,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
|
||||
{
|
||||
1, 2, 6, 7,
|
||||
3, 5, 8, 13,
|
||||
4, 9, 12, 14,
|
||||
10, 11, 15, 16
|
||||
};
|
||||
|
||||
/* vp8_default_zig_zag_mask generated with:
|
||||
|
||||
void vp8_init_scan_order_mask()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i;
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) =
|
||||
{
|
||||
1, 2, 32, 64,
|
||||
4, 16, 128, 4096,
|
||||
8, 256, 2048, 8192,
|
||||
512, 1024, 16384, -32768
|
||||
};
|
||||
|
||||
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
|
||||
|
||||
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
|
||||
|
||||
const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
|
||||
{
|
||||
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
|
||||
-ZERO_TOKEN, 4, /* 1 = ZERO */
|
||||
-ONE_TOKEN, 6, /* 2 = ONE */
|
||||
8, 12, /* 3 = LOW_VAL */
|
||||
-TWO_TOKEN, 10, /* 4 = TWO */
|
||||
-THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
|
||||
14, 16, /* 6 = HIGH_LOW */
|
||||
-DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
|
||||
18, 20, /* 8 = CAT_THREEFOUR */
|
||||
-DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
|
||||
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
|
||||
};
|
||||
|
||||
/* vp8_coef_encodings generated with:
|
||||
vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree);
|
||||
*/
|
||||
vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
|
||||
{
|
||||
{2, 2},
|
||||
{6, 3},
|
||||
{28, 5},
|
||||
{58, 6},
|
||||
{59, 6},
|
||||
{60, 6},
|
||||
{61, 6},
|
||||
{124, 7},
|
||||
{125, 7},
|
||||
{126, 7},
|
||||
{127, 7},
|
||||
{0, 1}
|
||||
};
|
||||
|
||||
/* Trees for extra bits. Probabilities are constant and
|
||||
do not depend on previously encoded bits */
|
||||
|
||||
static const vp8_prob Pcat1[] = { 159};
|
||||
static const vp8_prob Pcat2[] = { 165, 145};
|
||||
static const vp8_prob Pcat3[] = { 173, 148, 140};
|
||||
static const vp8_prob Pcat4[] = { 176, 155, 140, 135};
|
||||
static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130};
|
||||
static const vp8_prob Pcat6[] =
|
||||
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
|
||||
|
||||
|
||||
/* tree index tables generated with:
|
||||
|
||||
void init_bit_tree(vp8_tree_index *p, int n)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
while (++i < n)
|
||||
{
|
||||
p[0] = p[1] = i << 1;
|
||||
p += 2;
|
||||
}
|
||||
|
||||
p[0] = p[1] = 0;
|
||||
}
|
||||
|
||||
void init_bit_trees()
|
||||
{
|
||||
init_bit_tree(cat1, 1);
|
||||
init_bit_tree(cat2, 2);
|
||||
init_bit_tree(cat3, 3);
|
||||
init_bit_tree(cat4, 4);
|
||||
init_bit_tree(cat5, 5);
|
||||
init_bit_tree(cat6, 11);
|
||||
}
|
||||
*/
|
||||
|
||||
static const vp8_tree_index cat1[2] = { 0, 0 };
|
||||
static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 };
|
||||
static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 };
|
||||
static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
|
||||
static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
|
||||
static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
|
||||
14, 14, 16, 16, 18, 18, 20, 20, 0, 0 };
|
||||
|
||||
const vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
{
|
||||
{ 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, 1, 5},
|
||||
{ cat2, Pcat2, 2, 7},
|
||||
{ cat3, Pcat3, 3, 11},
|
||||
{ cat4, Pcat4, 4, 19},
|
||||
{ cat5, Pcat5, 5, 35},
|
||||
{ cat6, Pcat6, 11, 67},
|
||||
{ 0, 0, 0, 0}
|
||||
};
|
||||
|
||||
#include "default_coef_probs.h"
|
||||
|
||||
void vp8_default_coef_probs(VP8_COMMON *pc)
|
||||
{
|
||||
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
|
||||
}
|
||||
|
|
@ -1,109 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPY_H_
|
||||
#define VP8_COMMON_ENTROPY_H_
|
||||
|
||||
#include "treecoder.h"
|
||||
#include "blockd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Coefficient token alphabet */
|
||||
|
||||
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
|
||||
#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
|
||||
#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
|
||||
#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
|
||||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
|
||||
#define MAX_ENTROPY_TOKENS 12
|
||||
#define ENTROPY_NODES 11
|
||||
|
||||
extern const vp8_tree_index vp8_coef_tree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_tree_p tree;
|
||||
const vp8_prob *prob;
|
||||
int Len;
|
||||
int base_val;
|
||||
} vp8_extra_bit_struct;
|
||||
|
||||
extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
|
||||
|
||||
#define PROB_UPDATE_BASELINE_COST 7
|
||||
|
||||
#define MAX_PROB 255
|
||||
#define DCT_MAX_VALUE 2048
|
||||
|
||||
|
||||
/* Coefficients are predicted via a 3-dimensional probability table. */
|
||||
|
||||
/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
|
||||
|
||||
#define BLOCK_TYPES 4
|
||||
|
||||
/* Middle dimension is a coarsening of the coefficient's
|
||||
position within the 4x4 DCT. */
|
||||
|
||||
#define COEF_BANDS 8
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
|
||||
|
||||
/* Inside dimension is 3-valued measure of nearby complexity, that is,
|
||||
the extent to which nearby coefficients are nonzero. For the first
|
||||
coefficient (DC, unless block type is 0), we look at the (already encoded)
|
||||
blocks above and to the left of the current block. The context index is
|
||||
then the number (0,1,or 2) of these blocks having nonzero coefficients.
|
||||
After decoding a coefficient, the measure is roughly the size of the
|
||||
most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1).
|
||||
Note that the intuitive meaning of this measure changes as coefficients
|
||||
are decoded, e.g., prior to the first token, a zero means that my neighbors
|
||||
are empty while, after the first token, because of the use of end-of-block,
|
||||
a zero means we just decoded a zero and hence guarantees that a non-zero
|
||||
coefficient will appear later in this block. However, this shift
|
||||
in meaning is perfectly OK because our context depends also on the
|
||||
coefficient band (and since zigzag positions 0, 1, and 2 are in
|
||||
distinct bands). */
|
||||
|
||||
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
|
||||
# define PREV_COEF_CONTEXTS 3
|
||||
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
|
||||
|
||||
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
|
||||
|
||||
|
||||
struct VP8Common;
|
||||
void vp8_default_coef_probs(struct VP8Common *);
|
||||
|
||||
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]);
|
||||
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
|
||||
|
||||
void vp8_coef_tree_initialize(void);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ENTROPY_H_
|
|
@ -1,171 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#define USE_PREBUILT_TABLES
|
||||
|
||||
#include "entropymode.h"
|
||||
#include "entropy.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#include "vp8_entropymodedata.h"
|
||||
|
||||
int vp8_mv_cont(const int_mv *l, const int_mv *a)
|
||||
{
|
||||
int lez = (l->as_int == 0);
|
||||
int aez = (a->as_int == 0);
|
||||
int lea = (l->as_int == a->as_int);
|
||||
|
||||
if (lea && lez)
|
||||
return SUBMVREF_LEFT_ABOVE_ZED;
|
||||
|
||||
if (lea)
|
||||
return SUBMVREF_LEFT_ABOVE_SAME;
|
||||
|
||||
if (aez)
|
||||
return SUBMVREF_ABOVE_ZED;
|
||||
|
||||
if (lez)
|
||||
return SUBMVREF_LEFT_ZED;
|
||||
|
||||
return SUBMVREF_NORMAL;
|
||||
}
|
||||
|
||||
static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25};
|
||||
|
||||
const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] =
|
||||
{
|
||||
{ 147, 136, 18 },
|
||||
{ 106, 145, 1 },
|
||||
{ 179, 121, 1 },
|
||||
{ 223, 1 , 34 },
|
||||
{ 208, 1 , 1 }
|
||||
};
|
||||
|
||||
|
||||
|
||||
const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] =
|
||||
{
|
||||
{
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
1, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
2, 2, 3, 3,
|
||||
2, 2, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 2, 3,
|
||||
4, 5, 6, 7,
|
||||
8, 9, 10, 11,
|
||||
12, 13, 14, 15,
|
||||
}
|
||||
};
|
||||
|
||||
const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16};
|
||||
|
||||
const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150};
|
||||
|
||||
|
||||
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
|
||||
|
||||
const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */
|
||||
{
|
||||
-B_DC_PRED, 2, /* 0 = DC_NODE */
|
||||
-B_TM_PRED, 4, /* 1 = TM_NODE */
|
||||
-B_VE_PRED, 6, /* 2 = VE_NODE */
|
||||
8, 12, /* 3 = COM_NODE */
|
||||
-B_HE_PRED, 10, /* 4 = HE_NODE */
|
||||
-B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
|
||||
-B_LD_PRED, 14, /* 6 = LD_NODE */
|
||||
-B_VL_PRED, 16, /* 7 = VL_NODE */
|
||||
-B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
|
||||
};
|
||||
|
||||
/* Again, these trees use the same probability indices as their
|
||||
explicitly-programmed predecessors. */
|
||||
|
||||
const vp8_tree_index vp8_ymode_tree[8] =
|
||||
{
|
||||
-DC_PRED, 2,
|
||||
4, 6,
|
||||
-V_PRED, -H_PRED,
|
||||
-TM_PRED, -B_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_kf_ymode_tree[8] =
|
||||
{
|
||||
-B_PRED, 2,
|
||||
4, 6,
|
||||
-DC_PRED, -V_PRED,
|
||||
-H_PRED, -TM_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_uv_mode_tree[6] =
|
||||
{
|
||||
-DC_PRED, 2,
|
||||
-V_PRED, 4,
|
||||
-H_PRED, -TM_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_mbsplit_tree[6] =
|
||||
{
|
||||
-3, 2,
|
||||
-2, 4,
|
||||
-0, -1
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_mv_ref_tree[8] =
|
||||
{
|
||||
-ZEROMV, 2,
|
||||
-NEARESTMV, 4,
|
||||
-NEARMV, 6,
|
||||
-NEWMV, -SPLITMV
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_sub_mv_ref_tree[6] =
|
||||
{
|
||||
-LEFT4X4, 2,
|
||||
-ABOVE4X4, 4,
|
||||
-ZERO4X4, -NEW4X4
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_small_mvtree [14] =
|
||||
{
|
||||
2, 8,
|
||||
4, 6,
|
||||
-0, -1,
|
||||
-2, -3,
|
||||
10, 12,
|
||||
-4, -5,
|
||||
-6, -7
|
||||
};
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x)
|
||||
{
|
||||
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
|
||||
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
|
||||
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
|
||||
}
|
||||
|
||||
void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
|
||||
{
|
||||
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
|
||||
}
|
||||
|
|
@ -1,88 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPYMODE_H_
|
||||
#define VP8_COMMON_ENTROPYMODE_H_
|
||||
|
||||
#include "onyxc_int.h"
|
||||
#include "treecoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SUBMVREF_NORMAL,
|
||||
SUBMVREF_LEFT_ZED,
|
||||
SUBMVREF_ABOVE_ZED,
|
||||
SUBMVREF_LEFT_ABOVE_SAME,
|
||||
SUBMVREF_LEFT_ABOVE_ZED
|
||||
} sumvfref_t;
|
||||
|
||||
typedef int vp8_mbsplit[16];
|
||||
|
||||
#define VP8_NUMMBSPLITS 4
|
||||
|
||||
extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
|
||||
|
||||
extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */
|
||||
|
||||
extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1];
|
||||
|
||||
extern int vp8_mv_cont(const int_mv *l, const int_mv *a);
|
||||
#define SUBMVREF_COUNT 5
|
||||
extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1];
|
||||
|
||||
|
||||
extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES];
|
||||
|
||||
|
||||
extern const vp8_tree_index vp8_bmode_tree[];
|
||||
|
||||
extern const vp8_tree_index vp8_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_kf_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_uv_mode_tree[];
|
||||
|
||||
extern const vp8_tree_index vp8_mbsplit_tree[];
|
||||
extern const vp8_tree_index vp8_mv_ref_tree[];
|
||||
extern const vp8_tree_index vp8_sub_mv_ref_tree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES];
|
||||
extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES];
|
||||
extern const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES];
|
||||
extern const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES];
|
||||
extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS];
|
||||
|
||||
/* Inter mode values do not start at zero */
|
||||
|
||||
extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS];
|
||||
extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS];
|
||||
|
||||
extern const vp8_tree_index vp8_small_mvtree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_small_mvencodings[8];
|
||||
|
||||
/* Key frame default mode probs */
|
||||
extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES]
|
||||
[VP8_BINTRAMODES-1];
|
||||
extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1];
|
||||
extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1];
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x);
|
||||
void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
|
||||
void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ENTROPYMODE_H_
|
|
@ -1,49 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropymv.h"
|
||||
|
||||
const MV_CONTEXT vp8_mv_update_probs[2] =
|
||||
{
|
||||
{{
|
||||
237,
|
||||
246,
|
||||
253, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 250, 250, 252, 254, 254
|
||||
}},
|
||||
{{
|
||||
231,
|
||||
243,
|
||||
245, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 251, 251, 254, 254, 254
|
||||
}}
|
||||
};
|
||||
const MV_CONTEXT vp8_default_mv_context[2] =
|
||||
{
|
||||
{{
|
||||
/* row */
|
||||
162, /* is short */
|
||||
128, /* sign */
|
||||
225, 146, 172, 147, 214, 39, 156, /* short tree */
|
||||
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
|
||||
}},
|
||||
|
||||
|
||||
|
||||
{{
|
||||
/* same for column */
|
||||
164, /* is short */
|
||||
128,
|
||||
204, 170, 119, 235, 140, 230, 228,
|
||||
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
|
||||
|
||||
}}
|
||||
};
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPYMV_H_
|
||||
#define VP8_COMMON_ENTROPYMV_H_
|
||||
|
||||
#include "treecoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum
|
||||
{
|
||||
mv_max = 1023, /* max absolute value of a MV component */
|
||||
MVvals = (2 * mv_max) + 1, /* # possible values "" */
|
||||
mvfp_max = 255, /* max absolute value of a full pixel MV component */
|
||||
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
|
||||
|
||||
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
|
||||
mvnum_short = 8, /* magnitudes 0 through 7 */
|
||||
|
||||
/* probability offsets for coding each MV component */
|
||||
|
||||
mvpis_short = 0, /* short (<= 7) vs long (>= 8) */
|
||||
MVPsign, /* sign for non-zero */
|
||||
MVPshort, /* 8 short values = 7-position tree */
|
||||
|
||||
MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
|
||||
MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */
|
||||
};
|
||||
|
||||
typedef struct mv_context
|
||||
{
|
||||
vp8_prob prob[MVPcount]; /* often come in row, col pairs */
|
||||
} MV_CONTEXT;
|
||||
|
||||
extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ENTROPYMV_H_
|
|
@ -1,188 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "extend.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
|
||||
static void copy_and_extend_plane
|
||||
(
|
||||
unsigned char *s, /* source */
|
||||
int sp, /* source pitch */
|
||||
unsigned char *d, /* destination */
|
||||
int dp, /* destination pitch */
|
||||
int h, /* height */
|
||||
int w, /* width */
|
||||
int et, /* extend top border */
|
||||
int el, /* extend left border */
|
||||
int eb, /* extend bottom border */
|
||||
int er /* extend right border */
|
||||
)
|
||||
{
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
int linesize;
|
||||
|
||||
/* copy the left and right most columns out */
|
||||
src_ptr1 = s;
|
||||
src_ptr2 = s + w - 1;
|
||||
dest_ptr1 = d - el;
|
||||
dest_ptr2 = d + w;
|
||||
|
||||
for (i = 0; i < h; i++)
|
||||
{
|
||||
memset(dest_ptr1, src_ptr1[0], el);
|
||||
memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
dest_ptr1 += dp;
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
|
||||
/* Now copy the top and bottom lines into each line of the respective
|
||||
* borders
|
||||
*/
|
||||
src_ptr1 = d - el;
|
||||
src_ptr2 = d + dp * (h - 1) - el;
|
||||
dest_ptr1 = d + dp * (-et) - el;
|
||||
dest_ptr2 = d + dp * (h) - el;
|
||||
linesize = el + er + w;
|
||||
|
||||
for (i = 0; i < et; i++)
|
||||
{
|
||||
memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
dest_ptr1 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < eb; i++)
|
||||
{
|
||||
memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst)
|
||||
{
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
|
||||
copy_and_extend_plane(src->y_buffer, src->y_stride,
|
||||
dst->y_buffer, dst->y_stride,
|
||||
src->y_height, src->y_width,
|
||||
et, el, eb, er);
|
||||
|
||||
et = dst->border >> 1;
|
||||
el = dst->border >> 1;
|
||||
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
|
||||
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
|
||||
|
||||
copy_and_extend_plane(src->u_buffer, src->uv_stride,
|
||||
dst->u_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer, src->uv_stride,
|
||||
dst->v_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
}
|
||||
|
||||
|
||||
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst,
|
||||
int srcy, int srcx,
|
||||
int srch, int srcw)
|
||||
{
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
int src_y_offset = srcy * src->y_stride + srcx;
|
||||
int dst_y_offset = srcy * dst->y_stride + srcx;
|
||||
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
|
||||
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
|
||||
|
||||
/* If the side is not touching the bounder then don't extend. */
|
||||
if (srcy)
|
||||
et = 0;
|
||||
if (srcx)
|
||||
el = 0;
|
||||
if (srcy + srch != src->y_height)
|
||||
eb = 0;
|
||||
if (srcx + srcw != src->y_width)
|
||||
er = 0;
|
||||
|
||||
copy_and_extend_plane(src->y_buffer + src_y_offset,
|
||||
src->y_stride,
|
||||
dst->y_buffer + dst_y_offset,
|
||||
dst->y_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
|
||||
et = (et + 1) >> 1;
|
||||
el = (el + 1) >> 1;
|
||||
eb = (eb + 1) >> 1;
|
||||
er = (er + 1) >> 1;
|
||||
srch = (srch + 1) >> 1;
|
||||
srcw = (srcw + 1) >> 1;
|
||||
|
||||
copy_and_extend_plane(src->u_buffer + src_uv_offset,
|
||||
src->uv_stride,
|
||||
dst->u_buffer + dst_uv_offset,
|
||||
dst->uv_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer + src_uv_offset,
|
||||
src->uv_stride,
|
||||
dst->v_buffer + dst_uv_offset,
|
||||
dst->uv_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
}
|
||||
|
||||
|
||||
/* note the extension is only for the last row, for intra prediction purpose */
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf,
|
||||
unsigned char *YPtr,
|
||||
unsigned char *UPtr,
|
||||
unsigned char *VPtr)
|
||||
{
|
||||
int i;
|
||||
|
||||
YPtr += ybf->y_stride * 14;
|
||||
UPtr += ybf->uv_stride * 6;
|
||||
VPtr += ybf->uv_stride * 6;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
|
||||
YPtr += ybf->y_stride;
|
||||
UPtr += ybf->uv_stride;
|
||||
VPtr += ybf->uv_stride;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_EXTEND_H_
|
||||
#define VP8_COMMON_EXTEND_H_
|
||||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst);
|
||||
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst,
|
||||
int srcy, int srcx,
|
||||
int srch, int srcw);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_EXTEND_H_
|
|
@ -1,493 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "filter.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
|
||||
{
|
||||
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
static void filter_block2d_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_block2d_second_pass
|
||||
(
|
||||
int *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = (unsigned char)Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Start next row */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void filter_block2d
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int output_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter
|
||||
)
|
||||
{
|
||||
int FData[9*4]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
}
|
||||
void vp8_sixtap_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21*24]; /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_stride : Stride of source block.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the horizontal direction to produce the filtered output
|
||||
* block. Used to implement first-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_stride,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_stride - width;
|
||||
dst_ptr += width;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_second_pass
|
||||
*
|
||||
* INPUTS : INT32 *src_ptr : Pointer to source block.
|
||||
* UINT32 dst_pitch : Destination block pitch.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the vertical direction to produce the filtered output
|
||||
* block. Used to implement second-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_second_pass
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
dst_ptr += dst_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pitch : Stride of source block.
|
||||
* UINT32 dst_pitch : Stride of destination block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
* INT32 Width : Block width
|
||||
* INT32 Height : Block height
|
||||
*
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : 2-D filters an input block by applying a 2-tap
|
||||
* bi-linear filter horizontally followed by a 2-tap
|
||||
* bi-linear filter vertically on the result.
|
||||
*
|
||||
* SPECIAL NOTES : The largest block size can be handled here is 16x16
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_bilinear_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
#if 0
|
||||
{
|
||||
int i;
|
||||
unsigned char temp1[16];
|
||||
unsigned char temp2[16];
|
||||
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
if (temp1[i] != temp2[i])
|
||||
{
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_FILTER_H_
|
||||
#define VP8_COMMON_FILTER_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_FILTER_H_
|
|
@ -1,193 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "findnearmv.h"
|
||||
|
||||
const unsigned char vp8_mbsplit_offset[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
};
|
||||
|
||||
/* Predict motion vectors using those from already-decoded nearby blocks.
|
||||
Note that we only consider one 4x4 subblock from each candidate 16x16
|
||||
macroblock. */
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv *nearest,
|
||||
int_mv *nearby,
|
||||
int_mv *best_mv,
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
)
|
||||
{
|
||||
const MODE_INFO *above = here - xd->mode_info_stride;
|
||||
const MODE_INFO *left = here - 1;
|
||||
const MODE_INFO *aboveleft = above - 1;
|
||||
int_mv near_mvs[4];
|
||||
int_mv *mv = near_mvs;
|
||||
int *cntx = cnt;
|
||||
enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV};
|
||||
|
||||
/* Zero accumulators */
|
||||
mv[0].as_int = mv[1].as_int = mv[2].as_int = 0;
|
||||
cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
|
||||
|
||||
/* Process above */
|
||||
if (above->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (above->mbmi.mv.as_int)
|
||||
{
|
||||
(++mv)->as_int = above->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 2;
|
||||
}
|
||||
|
||||
/* Process left */
|
||||
if (left->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (left->mbmi.mv.as_int)
|
||||
{
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = left->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 2;
|
||||
}
|
||||
else
|
||||
cnt[CNT_INTRA] += 2;
|
||||
}
|
||||
|
||||
/* Process above left */
|
||||
if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (aboveleft->mbmi.mv.as_int)
|
||||
{
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = aboveleft->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 1;
|
||||
}
|
||||
else
|
||||
cnt[CNT_INTRA] += 1;
|
||||
}
|
||||
|
||||
/* If we have three distinct MV's ... */
|
||||
if (cnt[CNT_SPLITMV])
|
||||
{
|
||||
/* See if above-left MV can be merged with NEAREST */
|
||||
if (mv->as_int == near_mvs[CNT_NEAREST].as_int)
|
||||
cnt[CNT_NEAREST] += 1;
|
||||
}
|
||||
|
||||
cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV)
|
||||
+ (left->mbmi.mode == SPLITMV)) * 2
|
||||
+ (aboveleft->mbmi.mode == SPLITMV);
|
||||
|
||||
/* Swap near and nearest if necessary */
|
||||
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST])
|
||||
{
|
||||
int tmp;
|
||||
tmp = cnt[CNT_NEAREST];
|
||||
cnt[CNT_NEAREST] = cnt[CNT_NEAR];
|
||||
cnt[CNT_NEAR] = tmp;
|
||||
tmp = near_mvs[CNT_NEAREST].as_int;
|
||||
near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
|
||||
near_mvs[CNT_NEAR].as_int = tmp;
|
||||
}
|
||||
|
||||
/* Use near_mvs[0] to store the "best" MV */
|
||||
if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
|
||||
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
|
||||
|
||||
/* Set up return values */
|
||||
best_mv->as_int = near_mvs[0].as_int;
|
||||
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
|
||||
nearby->as_int = near_mvs[CNT_NEAR].as_int;
|
||||
}
|
||||
|
||||
|
||||
static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd)
|
||||
{
|
||||
inv->as_mv.row = src->as_mv.row * -1;
|
||||
inv->as_mv.col = src->as_mv.col * -1;
|
||||
vp8_clamp_mv2(inv, xd);
|
||||
vp8_clamp_mv2(src, xd);
|
||||
}
|
||||
|
||||
|
||||
int vp8_find_near_mvs_bias
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2],
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
)
|
||||
{
|
||||
int sign_bias = ref_frame_sign_bias[refframe];
|
||||
|
||||
vp8_find_near_mvs(xd,
|
||||
here,
|
||||
&mode_mv_sb[sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV],
|
||||
&best_mv_sb[sign_bias],
|
||||
cnt,
|
||||
refframe,
|
||||
ref_frame_sign_bias);
|
||||
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARESTMV], xd);
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV], xd);
|
||||
invert_and_clamp_mvs(&best_mv_sb[!sign_bias],
|
||||
&best_mv_sb[sign_bias], xd);
|
||||
|
||||
return sign_bias;
|
||||
}
|
||||
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
|
||||
)
|
||||
{
|
||||
p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0];
|
||||
p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
|
||||
p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
|
||||
p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
|
||||
/*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
|
||||
return p;
|
||||
}
|
||||
|
|
@ -1,194 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_FINDNEARMV_H_
|
||||
#define VP8_COMMON_FINDNEARMV_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "mv.h"
|
||||
#include "blockd.h"
|
||||
#include "modecont.h"
|
||||
#include "treecoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
|
||||
int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
mvp->as_mv.row *= -1;
|
||||
mvp->as_mv.col *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge,
|
||||
int mb_to_right_edge, int mb_to_top_edge,
|
||||
int mb_to_bottom_edge)
|
||||
{
|
||||
mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
|
||||
mb_to_left_edge : mv->as_mv.col;
|
||||
mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
|
||||
mb_to_right_edge : mv->as_mv.col;
|
||||
mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
|
||||
mb_to_top_edge : mv->as_mv.row;
|
||||
mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
|
||||
mb_to_bottom_edge : mv->as_mv.row;
|
||||
}
|
||||
static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
|
||||
int mb_to_right_edge,
|
||||
int mb_to_top_edge,
|
||||
int mb_to_bottom_edge)
|
||||
{
|
||||
unsigned int need_to_clamp;
|
||||
need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
|
||||
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
|
||||
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
|
||||
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
|
||||
return need_to_clamp;
|
||||
}
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv *nearest, int_mv *nearby, int_mv *best,
|
||||
int near_mv_ref_cts[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
);
|
||||
|
||||
|
||||
int vp8_find_near_mvs_bias
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2],
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
);
|
||||
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
|
||||
);
|
||||
|
||||
extern const unsigned char vp8_mbsplit_offset[4][16];
|
||||
|
||||
|
||||
static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->mv.as_int;
|
||||
}
|
||||
|
||||
static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 16;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + (b - 4))->mv.as_int;
|
||||
}
|
||||
static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 3)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->as_mode;
|
||||
}
|
||||
|
||||
static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b,
|
||||
int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 12)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 4)->as_mode;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_FINDNEARMV_H_
|
|
@ -1,104 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#if ARCH_ARM
|
||||
#include "vpx_ports/arm.h"
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
#endif
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
#include "vp8/common/systemdependent.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#if HAVE_UNISTD_H && !defined(__OS2__)
|
||||
#include <unistd.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
|
||||
#elif defined(__OS2__)
|
||||
#define INCL_DOS
|
||||
#define INCL_DOSSPINLOCK
|
||||
#include <os2.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
static int get_cpu_count()
|
||||
{
|
||||
int core_count = 16;
|
||||
|
||||
#if HAVE_UNISTD_H && !defined(__OS2__)
|
||||
#if defined(_SC_NPROCESSORS_ONLN)
|
||||
core_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif defined(_SC_NPROC_ONLN)
|
||||
core_count = sysconf(_SC_NPROC_ONLN);
|
||||
#endif
|
||||
#elif defined(_WIN32)
|
||||
{
|
||||
#if _WIN32_WINNT >= 0x0501
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetNativeSystemInfo(&sysinfo);
|
||||
#else
|
||||
PGNSI pGNSI;
|
||||
SYSTEM_INFO sysinfo;
|
||||
|
||||
/* Call GetNativeSystemInfo if supported or
|
||||
* GetSystemInfo otherwise. */
|
||||
|
||||
pGNSI = (PGNSI) GetProcAddress(
|
||||
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
|
||||
if (pGNSI != NULL)
|
||||
pGNSI(&sysinfo);
|
||||
else
|
||||
GetSystemInfo(&sysinfo);
|
||||
#endif
|
||||
|
||||
core_count = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#elif defined(__OS2__)
|
||||
{
|
||||
ULONG proc_id;
|
||||
ULONG status;
|
||||
|
||||
core_count = 0;
|
||||
for (proc_id = 1; ; proc_id++)
|
||||
{
|
||||
if (DosGetProcessorStatus(proc_id, &status))
|
||||
break;
|
||||
|
||||
if (status == PROC_ONLINE)
|
||||
core_count++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* other platforms */
|
||||
#endif
|
||||
|
||||
return core_count > 0 ? core_count : 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_clear_system_state_c() {};
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
ctx->processor_core_count = get_cpu_count();
|
||||
#endif /* CONFIG_MULTITHREAD */
|
||||
|
||||
#if ARCH_ARM
|
||||
ctx->cpu_caps = arm_cpu_caps();
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
ctx->cpu_caps = x86_simd_caps();
|
||||
#endif
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_HEADER_H_
|
||||
#define VP8_COMMON_HEADER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* 24 bits total */
|
||||
typedef struct
|
||||
{
|
||||
unsigned int type: 1;
|
||||
unsigned int version: 3;
|
||||
unsigned int show_frame: 1;
|
||||
|
||||
/* Allow 2^20 bytes = 8 megabits for first partition */
|
||||
|
||||
unsigned int first_partition_length_in_bytes: 19;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
unsigned int frame_number;
|
||||
unsigned int update_gold: 1;
|
||||
unsigned int uses_gold: 1;
|
||||
unsigned int update_last: 1;
|
||||
unsigned int uses_last: 1;
|
||||
#endif
|
||||
|
||||
} VP8_HEADER;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
#define VP8_HEADER_SIZE 8
|
||||
#else
|
||||
#define VP8_HEADER_SIZE 3
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_HEADER_H_
|
|
@ -1,90 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
unsigned char *dest, int stride);
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride);
|
||||
|
||||
void vp8_dequant_idct_add_y_block_c
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dst, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
dst += 4*stride - 16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_c
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dstu, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstu += 4;
|
||||
}
|
||||
|
||||
dstu += 4*stride - 8;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dstv, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstv += 4;
|
||||
}
|
||||
|
||||
dstv += 4*stride - 8;
|
||||
}
|
||||
}
|
|
@ -1,205 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Notes:
|
||||
*
|
||||
* This implementation makes use of 16 bit fixed point verio of two multiply
|
||||
* constants:
|
||||
* 1. sqrt(2) * cos (pi/8)
|
||||
* 2. sqrt(2) * sin (pi/8)
|
||||
* Becuase the first constant is bigger than 1, to maintain the same 16 bit
|
||||
* fixed point precision as the second one, we use a trick of
|
||||
* x * a = x + x*(a-1)
|
||||
* so
|
||||
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
|
||||
**************************************************************************/
|
||||
static const int cospi8sqrt2minus1 = 20091;
|
||||
static const int sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int i;
|
||||
int r, c;
|
||||
int a1, b1, c1, d1;
|
||||
short output[16];
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
int temp1, temp2;
|
||||
int shortpitch = 4;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
|
||||
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
op[shortpitch*0] = a1 + d1;
|
||||
op[shortpitch*3] = a1 - d1;
|
||||
|
||||
op[shortpitch*1] = b1 + c1;
|
||||
op[shortpitch*2] = b1 - c1;
|
||||
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
|
||||
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
ip += shortpitch;
|
||||
op += shortpitch;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int a = ip[c] + pred_ptr[c] ;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char) a ;
|
||||
}
|
||||
ip += 4;
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int a1 = ((input_dc + 4) >> 3);
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int a = a1 + pred_ptr[c] ;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char) a ;
|
||||
}
|
||||
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
short output[16];
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[4] = c1 + d1;
|
||||
op[8] = a1 - b1;
|
||||
op[12] = d1 - c1;
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[3];
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3];
|
||||
|
||||
a2 = a1 + b1;
|
||||
b2 = c1 + d1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
|
||||
op[0] = (a2 + 3) >> 3;
|
||||
op[1] = (b2 + 3) >> 3;
|
||||
op[2] = (c2 + 3) >> 3;
|
||||
op[3] = (d2 + 3) >> 3;
|
||||
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
|
||||
for(i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
int i;
|
||||
int a1;
|
||||
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
for(i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = a1;
|
||||
}
|
||||
}
|
|
@ -1,70 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_INVTRANS_H_
|
||||
#define VP8_COMMON_INVTRANS_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "blockd.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static void eob_adjust(char *eobs, short *diff)
|
||||
{
|
||||
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
|
||||
int js;
|
||||
for(js = 0; js < 16; js++)
|
||||
{
|
||||
if((eobs[js] == 0) && (diff[0] != 0))
|
||||
eobs[js]++;
|
||||
diff+=16;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd)
|
||||
{
|
||||
short *DQC = xd->dequant_y1;
|
||||
|
||||
if (xd->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
/* do 2nd order transform on the dc block */
|
||||
if (xd->eobs[24] > 1)
|
||||
{
|
||||
vp8_short_inv_walsh4x4
|
||||
(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_short_inv_walsh4x4_1
|
||||
(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
}
|
||||
eob_adjust(xd->eobs, xd->qcoeff);
|
||||
|
||||
DQC = xd->dequant_y1_dc;
|
||||
}
|
||||
vp8_dequant_idct_add_y_block
|
||||
(xd->qcoeff, DQC,
|
||||
xd->dst.y_buffer,
|
||||
xd->dst.y_stride, xd->eobs);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_INVTRANS_H_
|
|
@ -1,113 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_LOOPFILTER_H_
|
||||
#define VP8_COMMON_LOOPFILTER_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_LOOP_FILTER 63
|
||||
/* fraction of total macroblock rows to be used in fast filter level picking */
|
||||
/* has to be > 2 */
|
||||
#define PARTIAL_FRAME_FRACTION 8
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NORMAL_LOOPFILTER = 0,
|
||||
SIMPLE_LOOPFILTER = 1
|
||||
} LOOPFILTERTYPE;
|
||||
|
||||
#if ARCH_ARM
|
||||
#define SIMD_WIDTH 1
|
||||
#else
|
||||
#define SIMD_WIDTH 16
|
||||
#endif
|
||||
|
||||
/* Need to align this structure so when it is declared and
|
||||
* passed it can be loaded into vector registers.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
|
||||
unsigned char lvl[4][4][4];
|
||||
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
|
||||
unsigned char mode_lf_lut[10];
|
||||
} loop_filter_info_n;
|
||||
|
||||
typedef struct loop_filter_info
|
||||
{
|
||||
const unsigned char * mblim;
|
||||
const unsigned char * blim;
|
||||
const unsigned char * lim;
|
||||
const unsigned char * hev_thr;
|
||||
} loop_filter_info;
|
||||
|
||||
|
||||
typedef void loop_filter_uvfunction
|
||||
(
|
||||
unsigned char *u, /* source pointer */
|
||||
int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
unsigned char *v
|
||||
);
|
||||
|
||||
/* assorted loopfilter functions which get used elsewhere */
|
||||
struct VP8Common;
|
||||
struct macroblockd;
|
||||
struct modeinfo;
|
||||
|
||||
void vp8_loop_filter_init(struct VP8Common *cm);
|
||||
|
||||
void vp8_loop_filter_frame_init(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
|
||||
int frame_type);
|
||||
|
||||
void vp8_loop_filter_partial_frame(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
int sharpness_lvl);
|
||||
|
||||
void vp8_loop_filter_row_normal(struct VP8Common *cm,
|
||||
struct modeinfo *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr);
|
||||
|
||||
void vp8_loop_filter_row_simple(struct VP8Common *cm,
|
||||
struct modeinfo *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_LOOPFILTER_H_
|
|
@ -1,430 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
static signed char vp8_signed_char_clamp(int t)
|
||||
{
|
||||
t = (t < -128 ? -128 : t);
|
||||
t = (t > 127 ? 127 : t);
|
||||
return (signed char) t;
|
||||
}
|
||||
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_filter_mask(uc limit, uc blimit,
|
||||
uc p3, uc p2, uc p1, uc p0,
|
||||
uc q0, uc q1, uc q2, uc q3)
|
||||
{
|
||||
signed char mask = 0;
|
||||
mask |= (abs(p3 - p2) > limit);
|
||||
mask |= (abs(p2 - p1) > limit);
|
||||
mask |= (abs(p1 - p0) > limit);
|
||||
mask |= (abs(q1 - q0) > limit);
|
||||
mask |= (abs(q2 - q1) > limit);
|
||||
mask |= (abs(q3 - q2) > limit);
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit);
|
||||
return mask - 1;
|
||||
}
|
||||
|
||||
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
signed char hev = 0;
|
||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||
hev |= (abs(q1 - q0) > thresh) * -1;
|
||||
return hev;
|
||||
}
|
||||
|
||||
static void vp8_filter(signed char mask, uc hev, uc *op1,
|
||||
uc *op0, uc *oq0, uc *oq1)
|
||||
|
||||
{
|
||||
signed char ps0, qs0;
|
||||
signed char ps1, qs1;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char u;
|
||||
|
||||
ps1 = (signed char) * op1 ^ 0x80;
|
||||
ps0 = (signed char) * op0 ^ 0x80;
|
||||
qs0 = (signed char) * oq0 ^ 0x80;
|
||||
qs1 = (signed char) * oq1 ^ 0x80;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value &= hev;
|
||||
|
||||
/* inner taps */
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3
|
||||
* if it equals 4 we'll set to adjust by -1 to account for the fact
|
||||
* we'd round 3 the other way
|
||||
*/
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
filter_value = Filter1;
|
||||
|
||||
/* outer tap adjustments */
|
||||
filter_value += 1;
|
||||
filter_value >>= 1;
|
||||
filter_value &= ~hev;
|
||||
|
||||
u = vp8_signed_char_clamp(qs1 - filter_value);
|
||||
*oq1 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps1 + filter_value);
|
||||
*op1 = u ^ 0x80;
|
||||
|
||||
}
|
||||
void vp8_loop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
|
||||
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
}
|
||||
|
||||
static void vp8_mbfilter(signed char mask, uc hev,
|
||||
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
|
||||
{
|
||||
signed char s, u;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char ps2 = (signed char) * op2 ^ 0x80;
|
||||
signed char ps1 = (signed char) * op1 ^ 0x80;
|
||||
signed char ps0 = (signed char) * op0 ^ 0x80;
|
||||
signed char qs0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char qs1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char qs2 = (signed char) * oq2 ^ 0x80;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
|
||||
Filter2 = filter_value;
|
||||
Filter2 &= hev;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
|
||||
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
qs0 = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
|
||||
|
||||
/* only apply wider filter if not high edge variance */
|
||||
filter_value &= ~hev;
|
||||
Filter2 = filter_value;
|
||||
|
||||
/* roughly 3/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
|
||||
s = vp8_signed_char_clamp(qs0 - u);
|
||||
*oq0 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps0 + u);
|
||||
*op0 = s ^ 0x80;
|
||||
|
||||
/* roughly 2/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
|
||||
s = vp8_signed_char_clamp(qs1 - u);
|
||||
*oq1 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps1 + u);
|
||||
*op1 = s ^ 0x80;
|
||||
|
||||
/* roughly 1/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
|
||||
s = vp8_signed_char_clamp(qs2 - u);
|
||||
*oq2 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps2 + u);
|
||||
*op2 = s ^ 0x80;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
|
||||
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
||||
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
|
||||
}
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
/* Why does this cause problems for win32?
|
||||
* error C2143: syntax error : missing ';' before 'type'
|
||||
* (void) limit;
|
||||
*/
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
{
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char p1 = (signed char) * op1 ^ 0x80;
|
||||
signed char p0 = (signed char) * op0 ^ 0x80;
|
||||
signed char q0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char q1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char u;
|
||||
|
||||
filter_value = vp8_signed_char_clamp(p1 - q1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
|
||||
filter_value &= mask;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter1 >>= 3;
|
||||
u = vp8_signed_char_clamp(q0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(p0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
++s;
|
||||
}
|
||||
while (++i < 16);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
|
||||
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
||||
s += p;
|
||||
}
|
||||
while (++i < 16);
|
||||
|
||||
}
|
||||
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "blockd.h"
|
||||
|
||||
void vp8_setup_block_dptrs(MACROBLOCKD *x)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 25; r++)
|
||||
{
|
||||
x->block[r].qcoeff = x->qcoeff + r * 16;
|
||||
x->block[r].dqcoeff = x->dqcoeff + r * 16;
|
||||
x->block[r].eob = x->eobs + r;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_block_doffsets(MACROBLOCKD *x)
|
||||
{
|
||||
int block;
|
||||
|
||||
for (block = 0; block < 16; block++) /* y blocks */
|
||||
{
|
||||
x->block[block].offset =
|
||||
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4;
|
||||
}
|
||||
|
||||
for (block = 16; block < 20; block++) /* U and V blocks */
|
||||
{
|
||||
x->block[block+4].offset =
|
||||
x->block[block].offset =
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
|
||||
}
|
||||
}
|
|
@ -1,386 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/* MFQE: Multiframe Quality Enhancement
|
||||
* In rate limited situations keyframes may cause significant visual artifacts
|
||||
* commonly referred to as "popping." This file implements a postproccesing
|
||||
* algorithm which blends data from the preceeding frame when there is no
|
||||
* motion and the q from the previous frame is lower which indicates that it is
|
||||
* higher quality.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8/common/postproc.h"
|
||||
#include "vpx_dsp/variance.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static void filter_by_weight(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int block_size, int src_weight)
|
||||
{
|
||||
int dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int rounding_bit = 1 << (MFQE_PRECISION - 1);
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < block_size; r++)
|
||||
{
|
||||
for (c = 0; c < block_size; c++)
|
||||
{
|
||||
dst[c] = (src[c] * src_weight +
|
||||
dst[c] * dst_weight +
|
||||
rounding_bit) >> MFQE_PRECISION;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int src_weight)
|
||||
{
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int src_weight)
|
||||
{
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int src_weight)
|
||||
{
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
|
||||
}
|
||||
|
||||
static void apply_ifactor(unsigned char *y_src,
|
||||
int y_src_stride,
|
||||
unsigned char *y_dst,
|
||||
int y_dst_stride,
|
||||
unsigned char *u_src,
|
||||
unsigned char *v_src,
|
||||
int uv_src_stride,
|
||||
unsigned char *u_dst,
|
||||
unsigned char *v_dst,
|
||||
int uv_dst_stride,
|
||||
int block_size,
|
||||
int src_weight)
|
||||
{
|
||||
if (block_size == 16)
|
||||
{
|
||||
vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
|
||||
vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
|
||||
vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
|
||||
}
|
||||
else /* if (block_size == 8) */
|
||||
{
|
||||
vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
|
||||
vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
|
||||
vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int int_sqrt(unsigned int x)
|
||||
{
|
||||
unsigned int y = x;
|
||||
unsigned int guess;
|
||||
int p = 1;
|
||||
while (y>>=1) p++;
|
||||
p>>=1;
|
||||
|
||||
guess=0;
|
||||
while (p>=0)
|
||||
{
|
||||
guess |= (1<<p);
|
||||
if (x<guess*guess)
|
||||
guess -= (1<<p);
|
||||
p--;
|
||||
}
|
||||
/* choose between guess or guess+1 */
|
||||
return guess+(guess*guess+guess+1<=x);
|
||||
}
|
||||
|
||||
#define USE_SSD
|
||||
static void multiframe_quality_enhance_block
|
||||
(
|
||||
int blksize, /* Currently only values supported are 16, 8 */
|
||||
int qcurr,
|
||||
int qprev,
|
||||
unsigned char *y,
|
||||
unsigned char *u,
|
||||
unsigned char *v,
|
||||
int y_stride,
|
||||
int uv_stride,
|
||||
unsigned char *yd,
|
||||
unsigned char *ud,
|
||||
unsigned char *vd,
|
||||
int yd_stride,
|
||||
int uvd_stride
|
||||
)
|
||||
{
|
||||
static const unsigned char VP8_ZEROS[16]=
|
||||
{
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
};
|
||||
int uvblksize = blksize >> 1;
|
||||
int qdiff = qcurr - qprev;
|
||||
|
||||
int i;
|
||||
unsigned char *up;
|
||||
unsigned char *udp;
|
||||
unsigned char *vp;
|
||||
unsigned char *vdp;
|
||||
|
||||
unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
|
||||
|
||||
if (blksize == 16)
|
||||
{
|
||||
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 128)>>8;
|
||||
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 32)>>6;
|
||||
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 32)>>6;
|
||||
#else
|
||||
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
|
||||
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
|
||||
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
|
||||
#endif
|
||||
}
|
||||
else /* if (blksize == 8) */
|
||||
{
|
||||
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 32)>>6;
|
||||
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 8)>>4;
|
||||
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 8)>>4;
|
||||
#else
|
||||
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
|
||||
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
|
||||
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
|
||||
#endif
|
||||
}
|
||||
|
||||
actrisk = (actd > act * 5);
|
||||
|
||||
/* thr = qdiff/16 + log2(act) + log4(qprev) */
|
||||
thr = (qdiff >> 4);
|
||||
while (actd >>= 1) thr++;
|
||||
while (qprev >>= 2) thr++;
|
||||
|
||||
#ifdef USE_SSD
|
||||
thrsq = thr * thr;
|
||||
if (sad < thrsq &&
|
||||
/* additional checks for color mismatch and excessive addition of
|
||||
* high-frequencies */
|
||||
4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
|
||||
#else
|
||||
if (sad < thr &&
|
||||
/* additional checks for color mismatch and excessive addition of
|
||||
* high-frequencies */
|
||||
2 * usad < thr && 2 * vsad < thr && !actrisk)
|
||||
#endif
|
||||
{
|
||||
int ifactor;
|
||||
#ifdef USE_SSD
|
||||
/* TODO: optimize this later to not need sqr root */
|
||||
sad = int_sqrt(sad);
|
||||
#endif
|
||||
ifactor = (sad << MFQE_PRECISION) / thr;
|
||||
ifactor >>= (qdiff >> 5);
|
||||
|
||||
if (ifactor)
|
||||
{
|
||||
apply_ifactor(y, y_stride, yd, yd_stride,
|
||||
u, v, uv_stride,
|
||||
ud, vd, uvd_stride,
|
||||
blksize, ifactor);
|
||||
}
|
||||
}
|
||||
else /* else implicitly copy from previous frame */
|
||||
{
|
||||
if (blksize == 16)
|
||||
{
|
||||
vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
|
||||
vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
|
||||
vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
|
||||
}
|
||||
else /* if (blksize == 8) */
|
||||
{
|
||||
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
|
||||
for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
|
||||
memcpy(udp, up, uvblksize);
|
||||
for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
|
||||
memcpy(vdp, vp, uvblksize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map)
|
||||
{
|
||||
if (mode_info_context->mbmi.mb_skip_coeff)
|
||||
map[0] = map[1] = map[2] = map[3] = 1;
|
||||
else if (mode_info_context->mbmi.mode==SPLITMV)
|
||||
{
|
||||
static int ndx[4][4] =
|
||||
{
|
||||
{0, 1, 4, 5},
|
||||
{2, 3, 6, 7},
|
||||
{8, 9, 12, 13},
|
||||
{10, 11, 14, 15}
|
||||
};
|
||||
int i, j;
|
||||
for (i=0; i<4; ++i)
|
||||
{
|
||||
map[i] = 1;
|
||||
for (j=0; j<4 && map[j]; ++j)
|
||||
map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
|
||||
mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
map[0] = map[1] = map[2] = map[3] =
|
||||
(mode_info_context->mbmi.mode > B_PRED &&
|
||||
abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
|
||||
abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
|
||||
}
|
||||
return (map[0]+map[1]+map[2]+map[3]);
|
||||
}
|
||||
|
||||
void vp8_multiframe_quality_enhance
|
||||
(
|
||||
VP8_COMMON *cm
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *show = cm->frame_to_show;
|
||||
YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
|
||||
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
/* Point at base of Mb MODE_INFO list has motion vectors etc */
|
||||
const MODE_INFO *mode_info_context = cm->show_frame_mi;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int totmap, map[4];
|
||||
int qcurr = cm->base_qindex;
|
||||
int qprev = cm->postproc_state.last_base_qindex;
|
||||
|
||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = show->y_buffer;
|
||||
u_ptr = show->u_buffer;
|
||||
v_ptr = show->v_buffer;
|
||||
yd_ptr = dest->y_buffer;
|
||||
ud_ptr = dest->u_buffer;
|
||||
vd_ptr = dest->v_buffer;
|
||||
|
||||
/* postprocess each macro block */
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
/* if motion is high there will likely be no benefit */
|
||||
if (frame_type == INTER_FRAME) totmap = qualify_inter_mb(mode_info_context, map);
|
||||
else totmap = (frame_type == KEY_FRAME ? 4 : 0);
|
||||
if (totmap)
|
||||
{
|
||||
if (totmap < 4)
|
||||
{
|
||||
int i, j;
|
||||
for (i=0; i<2; ++i)
|
||||
for (j=0; j<2; ++j)
|
||||
{
|
||||
if (map[i*2+j])
|
||||
{
|
||||
multiframe_quality_enhance_block(8, qcurr, qprev,
|
||||
y_ptr + 8*(i*show->y_stride+j),
|
||||
u_ptr + 4*(i*show->uv_stride+j),
|
||||
v_ptr + 4*(i*show->uv_stride+j),
|
||||
show->y_stride,
|
||||
show->uv_stride,
|
||||
yd_ptr + 8*(i*dest->y_stride+j),
|
||||
ud_ptr + 4*(i*dest->uv_stride+j),
|
||||
vd_ptr + 4*(i*dest->uv_stride+j),
|
||||
dest->y_stride,
|
||||
dest->uv_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* copy a 8x8 block */
|
||||
int k;
|
||||
unsigned char *up = u_ptr + 4*(i*show->uv_stride+j);
|
||||
unsigned char *udp = ud_ptr + 4*(i*dest->uv_stride+j);
|
||||
unsigned char *vp = v_ptr + 4*(i*show->uv_stride+j);
|
||||
unsigned char *vdp = vd_ptr + 4*(i*dest->uv_stride+j);
|
||||
vp8_copy_mem8x8(y_ptr + 8*(i*show->y_stride+j), show->y_stride,
|
||||
yd_ptr + 8*(i*dest->y_stride+j), dest->y_stride);
|
||||
for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
|
||||
vp += show->uv_stride, vdp += dest->uv_stride)
|
||||
{
|
||||
memcpy(udp, up, 4);
|
||||
memcpy(vdp, vp, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* totmap = 4 */
|
||||
{
|
||||
multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr,
|
||||
u_ptr, v_ptr,
|
||||
show->y_stride,
|
||||
show->uv_stride,
|
||||
yd_ptr, ud_ptr, vd_ptr,
|
||||
dest->y_stride,
|
||||
dest->uv_stride);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
|
||||
vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
|
||||
vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
|
||||
}
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
yd_ptr += 16;
|
||||
ud_ptr += 8;
|
||||
vd_ptr += 8;
|
||||
mode_info_context++; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
|
||||
u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
|
||||
ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
|
||||
mode_info_context++; /* Skip border mb */
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
void vp8_dequant_idct_add_dspr2(short *input, short *dq,
|
||||
unsigned char *dest, int stride)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
|
||||
vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
|
||||
|
||||
memset(input, 0, 32);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,88 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
|
||||
void vp8_dequant_idct_add_y_block_dspr2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dst, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
dst += 4 * stride - 16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_dspr2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dstu, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstu += 4;
|
||||
}
|
||||
|
||||
dstu += 4 * stride - 8;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dstv, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstv += 4;
|
||||
}
|
||||
|
||||
dstv += 4 * stride - 8;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,369 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
#define CROP_WIDTH 256
|
||||
|
||||
/******************************************************************************
|
||||
* Notes:
|
||||
*
|
||||
* This implementation makes use of 16 bit fixed point version of two multiply
|
||||
* constants:
|
||||
* 1. sqrt(2) * cos (pi/8)
|
||||
* 2. sqrt(2) * sin (pi/8)
|
||||
* Since the first constant is bigger than 1, to maintain the same 16 bit
|
||||
* fixed point precision as the second one, we use a trick of
|
||||
* x * a = x + x*(a-1)
|
||||
* so
|
||||
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
|
||||
****************************************************************************/
|
||||
extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
|
||||
static const int cospi8sqrt2minus1 = 20091;
|
||||
static const int sinpi8sqrt2 = 35468;
|
||||
|
||||
inline void prefetch_load_short(short *src)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"pref 0, 0(%[src]) \n\t"
|
||||
:
|
||||
: [src] "r" (src)
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int r, c;
|
||||
int a1, b1, c1, d1;
|
||||
short output[16];
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
int temp1, temp2;
|
||||
int shortpitch = 4;
|
||||
|
||||
int c2, d2;
|
||||
int temp3, temp4;
|
||||
unsigned char *cm = ff_cropTbl + CROP_WIDTH;
|
||||
|
||||
/* prepare data for load */
|
||||
prefetch_load_short(ip + 8);
|
||||
|
||||
/* first loop is unrolled */
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
|
||||
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[13] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[0] = a1 + d1;
|
||||
op[12] = a1 - d1;
|
||||
op[4] = b1 + c1;
|
||||
op[8] = b1 - c1;
|
||||
|
||||
a1 = ip[1] + ip[9];
|
||||
b1 = ip[1] - ip[9];
|
||||
|
||||
op[1] = a1 + d2;
|
||||
op[13] = a1 - d2;
|
||||
op[5] = b1 + c2;
|
||||
op[9] = b1 - c2;
|
||||
|
||||
a1 = ip[2] + ip[10];
|
||||
b1 = ip[2] - ip[10];
|
||||
|
||||
temp1 = (ip[6] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[14] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[7] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[2] = a1 + d1;
|
||||
op[14] = a1 - d1;
|
||||
op[6] = b1 + c1;
|
||||
op[10] = b1 - c1;
|
||||
|
||||
a1 = ip[3] + ip[11];
|
||||
b1 = ip[3] - ip[11];
|
||||
|
||||
op[3] = a1 + d2;
|
||||
op[15] = a1 - d2;
|
||||
op[7] = b1 + c2;
|
||||
op[11] = b1 - c2;
|
||||
|
||||
ip = output;
|
||||
|
||||
/* prepare data for load */
|
||||
prefetch_load_short(ip + shortpitch);
|
||||
|
||||
/* second loop is unrolled */
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
|
||||
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[7] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
a1 = ip[4] + ip[6];
|
||||
b1 = ip[4] - ip[6];
|
||||
|
||||
op[4] = (a1 + d2 + 4) >> 3;
|
||||
op[7] = (a1 - d2 + 4) >> 3;
|
||||
op[5] = (b1 + c2 + 4) >> 3;
|
||||
op[6] = (b1 - c2 + 4) >> 3;
|
||||
|
||||
a1 = ip[8] + ip[10];
|
||||
b1 = ip[8] - ip[10];
|
||||
|
||||
temp1 = (ip[9] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[11] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[13] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[8] = (a1 + d1 + 4) >> 3;
|
||||
op[11] = (a1 - d1 + 4) >> 3;
|
||||
op[9] = (b1 + c1 + 4) >> 3;
|
||||
op[10] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
a1 = ip[12] + ip[14];
|
||||
b1 = ip[12] - ip[14];
|
||||
|
||||
op[12] = (a1 + d2 + 4) >> 3;
|
||||
op[15] = (a1 - d2 + 4) >> 3;
|
||||
op[13] = (b1 + c2 + 4) >> 3;
|
||||
op[14] = (b1 - c2 + 4) >> 3;
|
||||
|
||||
ip = output;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
short a = ip[c] + pred_ptr[c] ;
|
||||
dst_ptr[c] = cm[a] ;
|
||||
}
|
||||
|
||||
ip += 4;
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride)
|
||||
{
|
||||
int a1;
|
||||
int i, absa1;
|
||||
int t2, vector_a1, vector_a;
|
||||
|
||||
/* a1 = ((input_dc + 4) >> 3); */
|
||||
__asm__ __volatile__ (
|
||||
"addi %[a1], %[input_dc], 4 \n\t"
|
||||
"sra %[a1], %[a1], 3 \n\t"
|
||||
: [a1] "=r" (a1)
|
||||
: [input_dc] "r" (input_dc)
|
||||
);
|
||||
|
||||
if (a1 < 0)
|
||||
{
|
||||
/* use quad-byte
|
||||
* input and output memory are four byte aligned
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"abs %[absa1], %[a1] \n\t"
|
||||
"replv.qb %[vector_a1], %[absa1] \n\t"
|
||||
: [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
|
||||
: [a1] "r" (a1)
|
||||
);
|
||||
|
||||
/* use (a1 - predptr[c]) instead a1 + predptr[c] */
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"lw %[t2], 0(%[pred_ptr]) \n\t"
|
||||
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
|
||||
"subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t"
|
||||
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
|
||||
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
|
||||
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
|
||||
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
|
||||
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* use quad-byte
|
||||
* input and output memory are four byte aligned
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"replv.qb %[vector_a1], %[a1] \n\t"
|
||||
: [vector_a1] "=r" (vector_a1)
|
||||
: [a1] "r" (a1)
|
||||
);
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"lw %[t2], 0(%[pred_ptr]) \n\t"
|
||||
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
|
||||
"addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t"
|
||||
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
|
||||
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
|
||||
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
|
||||
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
|
||||
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
short output[16];
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
|
||||
prefetch_load_short(ip);
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[4] = c1 + d1;
|
||||
op[8] = a1 - b1;
|
||||
op[12] = d1 - c1;
|
||||
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
prefetch_load_short(ip);
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
a1 = ip[0] + ip[3] + 3;
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3] + 3;
|
||||
|
||||
a2 = a1 + b1;
|
||||
b2 = d1 + c1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
|
||||
op[0] = a2 >> 3;
|
||||
op[1] = b2 >> 3;
|
||||
op[2] = c2 >> 3;
|
||||
op[3] = d2 >> 3;
|
||||
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
int a1;
|
||||
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sh %[a1], 0(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 32(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 64(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 96(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 128(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 160(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 192(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 224(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 256(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 288(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 320(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 352(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 384(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 416(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 448(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 480(%[mb_dqcoeff]) \n\t"
|
||||
|
||||
:
|
||||
: [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff)
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,121 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
inline void prefetch_load_int(unsigned char *src)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"pref 0, 0(%[src]) \n\t"
|
||||
:
|
||||
: [src] "r" (src)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
__inline void vp8_copy_mem16x16_dspr2(
|
||||
unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
unsigned int a0, a1, a2, a3;
|
||||
|
||||
for (r = 16; r--;)
|
||||
{
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__ (
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"ulw %[a2], 8(%[src]) \n\t"
|
||||
"ulw %[a3], 12(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
"sw %[a2], 8(%[dst]) \n\t"
|
||||
"sw %[a3], 12(%[dst]) \n\t"
|
||||
: [a0] "=&r" (a0), [a1] "=&r" (a1),
|
||||
[a2] "=&r" (a2), [a3] "=&r" (a3)
|
||||
: [src] "r" (src), [dst] "r" (dst)
|
||||
);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__inline void vp8_copy_mem8x8_dspr2(
|
||||
unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
unsigned int a0, a1;
|
||||
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
for (r = 8; r--;)
|
||||
{
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__ (
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
: [a0] "=&r" (a0), [a1] "=&r" (a1)
|
||||
: [src] "r" (src), [dst] "r" (dst)
|
||||
);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__inline void vp8_copy_mem8x4_dspr2(
|
||||
unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
unsigned int a0, a1;
|
||||
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
for (r = 4; r--;)
|
||||
{
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__ (
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
: [a0] "=&r" (a0), [a1] "=&r" (a1)
|
||||
: [src] "r" (src), [dst] "r" (dst)
|
||||
);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,911 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
DECLARE_ALIGNED(16, static const int8_t, vp8_bilinear_filters_msa[7][2]) =
|
||||
{
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
static const uint8_t vp8_mc_filt_mask_arr[16 * 3] =
|
||||
{
|
||||
/* 8 width cases */
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
|
||||
/* 4 width cases */
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
|
||||
/* 4 width cases */
|
||||
8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
|
||||
};
|
||||
|
||||
static void common_hz_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, mask;
|
||||
v16u8 filt0, vec0, vec1, res0, res1;
|
||||
v8u16 vec2, vec3, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
|
||||
SRARI_H2_UH(vec2, vec3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
|
||||
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hz_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16u8 vec0, vec1, vec2, vec3, filt0;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
|
||||
v16i8 res0, res1, res2, res3;
|
||||
v8u16 vec4, vec5, vec6, vec7, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec4, vec5, vec6, vec7);
|
||||
SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT);
|
||||
PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
|
||||
res0, res1, res2, res3);
|
||||
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hz_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
else if (8 == height)
|
||||
{
|
||||
common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hz_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16u8 filt0;
|
||||
v16i8 src0, src1, src2, src3, mask;
|
||||
v8u16 vec0, vec1, vec2, vec3, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
|
||||
ST8x4_UB(src0, src1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hz_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
v16u8 filt0;
|
||||
v16i8 src0, src1, src2, src3, mask, out0, out1;
|
||||
v8u16 vec0, vec1, vec2, vec3, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
if (16 == height)
|
||||
{
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hz_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hz_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
|
||||
v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
|
||||
v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
loop_cnt = (height >> 2) - 1;
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src2, src4, src6);
|
||||
LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
|
||||
VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
out0, out1, out2, out3);
|
||||
DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
|
||||
out4, out5, out6, out7);
|
||||
SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT);
|
||||
SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(out0, out1, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out2, out3, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out4, out5, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out6, out7, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
for (; loop_cnt--;)
|
||||
{
|
||||
LD_SB4(src, src_stride, src0, src2, src4, src6);
|
||||
LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
|
||||
VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
out0, out1, out2, out3);
|
||||
DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
|
||||
out4, out5, out6, out7);
|
||||
SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT);
|
||||
SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(out0, out1, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out2, out3, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out4, out5, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out6, out7, dst);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4;
|
||||
v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
|
||||
v16u8 filt0;
|
||||
v8i16 filt;
|
||||
v8u16 tmp0, tmp1;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
|
||||
src += (5 * src_stride);
|
||||
|
||||
ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
|
||||
src10_r, src21_r, src32_r, src43_r);
|
||||
ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
|
||||
DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
|
||||
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
|
||||
src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
|
||||
ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_vt_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
|
||||
v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
|
||||
v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v16u8 filt0;
|
||||
v8i16 filt;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
|
||||
src8 = LD_SB(src);
|
||||
src += src_stride;
|
||||
|
||||
ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
|
||||
src32_r, src43_r);
|
||||
ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
|
||||
src76_r, src87_r);
|
||||
ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
|
||||
src87_r, src76_r, src2110, src4332, src6554, src8776);
|
||||
DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
|
||||
ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
|
||||
ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride);
|
||||
}
|
||||
|
||||
static void common_vt_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
else if (8 == height)
|
||||
{
|
||||
common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
|
||||
v16i8 out0, out1;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 filt;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
|
||||
ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
|
||||
ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_vt_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
|
||||
v16i8 out0, out1;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 filt;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
src0 = LD_UB(src);
|
||||
src += src_stride;
|
||||
|
||||
for (loop_cnt = (height >> 3); loop_cnt--;)
|
||||
{
|
||||
LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
|
||||
src += (8 * src_stride);
|
||||
|
||||
ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
|
||||
vec0, vec1, vec2, vec3);
|
||||
ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
|
||||
vec4, vec5, vec6, vec7);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
src0 = src8;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
|
||||
height);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16u8 src0, src1, src2, src3, src4;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 filt;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
src0 = LD_UB(src);
|
||||
src += src_stride;
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;)
|
||||
{
|
||||
LD_UB4(src, src_stride, src1, src2, src3, src4);
|
||||
src += (4 * src_stride);
|
||||
|
||||
ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
|
||||
ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
|
||||
DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
|
||||
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp0, tmp1, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
|
||||
ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
|
||||
DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
|
||||
SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp2, tmp3, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
|
||||
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp0, tmp1, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
|
||||
SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp2, tmp3, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
src0 = src4;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4, mask;
|
||||
v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1;
|
||||
v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
|
||||
|
||||
filt = LD_UH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
filt = LD_UH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
|
||||
hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
|
||||
|
||||
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
|
||||
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
|
||||
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
|
||||
v16i8 res0, res1, res2, res3;
|
||||
v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
|
||||
v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
|
||||
v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
|
||||
|
||||
filt = LD_UH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
filt = LD_UH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
src8 = LD_SB(src);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
|
||||
hz_out3, hz_out5, 8);
|
||||
hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6);
|
||||
|
||||
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
|
||||
ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
|
||||
vec4, vec5, vec6, vec7);
|
||||
SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT);
|
||||
PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
|
||||
res0, res1, res2, res3);
|
||||
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_4w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert,
|
||||
int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert);
|
||||
}
|
||||
else if (8 == height)
|
||||
{
|
||||
common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
|
||||
v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
|
||||
v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
filt = LD_SH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h(filt, 0);
|
||||
filt = LD_SH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt_vt);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt_vt);
|
||||
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_8x8mult_msa(uint8_t *RESTRICT src,
|
||||
int32_t src_stride,
|
||||
uint8_t *RESTRICT dst,
|
||||
int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert,
|
||||
int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
|
||||
v16u8 filt_hz, filt_vt, vec0;
|
||||
v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
|
||||
v8i16 filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
filt = LD_SH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h(filt, 0);
|
||||
filt = LD_SH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
src0 = LD_SB(src);
|
||||
src += src_stride;
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
|
||||
for (loop_cnt = (height >> 3); loop_cnt--;)
|
||||
{
|
||||
LD_SB4(src, src_stride, src1, src2, src3, src4);
|
||||
src += (4 * src_stride);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp1 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp2 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp3 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
LD_SB4(src, src_stride, src1, src2, src3, src4);
|
||||
src += (4 * src_stride);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp4 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
SRARI_H2_UH(tmp3, tmp4, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp5 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp6 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp7 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp8 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_8w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert,
|
||||
int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert, height);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_16w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert,
|
||||
int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
|
||||
v16u8 filt_hz, filt_vt, vec0, vec1;
|
||||
v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3;
|
||||
v8i16 filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
/* rearranging filter */
|
||||
filt = LD_SH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h(filt, 0);
|
||||
filt = LD_SH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_SB2(src, 8, src0, src1);
|
||||
src += src_stride;
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;)
|
||||
{
|
||||
LD_SB4(src, src_stride, src0, src2, src4, src6);
|
||||
LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp1, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp1, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp1, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp1, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
int32_t xoffset, int32_t yoffset,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride)
|
||||
{
|
||||
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
|
||||
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hv_2ht_2vt_4w_msa(src, src_stride, dst, dst_stride,
|
||||
h_filter, v_filter, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_4w_msa(src, src_stride, dst, dst_stride, v_filter, 4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hz_2t_4w_msa(src, src_stride, dst, dst_stride, h_filter, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t tp0, tp1, tp2, tp3;
|
||||
|
||||
LW4(src, src_stride, tp0, tp1, tp2, tp3);
|
||||
SW4(tp0, tp1, tp2, tp3, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
int32_t xoffset, int32_t yoffset,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride)
|
||||
{
|
||||
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
|
||||
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride,
|
||||
h_filter, v_filter, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x4(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
int32_t xoffset, int32_t yoffset,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride)
|
||||
{
|
||||
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
|
||||
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride,
|
||||
h_filter, v_filter, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 8);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
int32_t xoffset, int32_t yoffset,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride)
|
||||
{
|
||||
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
|
||||
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride,
|
||||
h_filter, v_filter, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_16w_msa(src, src_stride, dst, dst_stride, v_filter,
|
||||
16);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hz_2t_16w_msa(src, src_stride, dst, dst_stride, h_filter,
|
||||
16);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,70 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static void copy_8x4_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
uint64_t src0, src1, src2, src3;
|
||||
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void copy_8x8_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
uint64_t src0, src1, src2, src3;
|
||||
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void copy_16x16_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
|
||||
|
||||
LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
LD_UB8(src, src_stride, src8, src9, src10, src11, src12, src13, src14,
|
||||
src15);
|
||||
|
||||
ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
|
||||
dst += (8 * dst_stride);
|
||||
ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, dst,
|
||||
dst_stride);
|
||||
}
|
||||
|
||||
void vp8_copy_mem16x16_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
copy_16x16_msa(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
copy_8x8_msa(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x4_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
copy_8x4_msa(src, src_stride, dst, dst_stride);
|
||||
}
|
|
@ -1,457 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static const int32_t cospi8sqrt2minus1 = 20091;
|
||||
static const int32_t sinpi8sqrt2 = 35468;
|
||||
|
||||
#define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 s4_m, s5_m, s6_m, s7_m; \
|
||||
\
|
||||
TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \
|
||||
ILVR_D2_SH(s6_m, s4_m, s7_m, s5_m, out0, out2); \
|
||||
out1 = (v8i16)__msa_ilvl_d((v2i64)s6_m, (v2i64)s4_m); \
|
||||
out3 = (v8i16)__msa_ilvl_d((v2i64)s7_m, (v2i64)s5_m); \
|
||||
}
|
||||
|
||||
#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \
|
||||
({ \
|
||||
v8i16 out_m; \
|
||||
v8i16 zero_m = { 0 }; \
|
||||
v4i32 tmp1_m, tmp2_m; \
|
||||
v4i32 sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
|
||||
\
|
||||
ILVRL_H2_SW(in, zero_m, tmp1_m, tmp2_m); \
|
||||
tmp1_m >>= 16; \
|
||||
tmp2_m >>= 16; \
|
||||
tmp1_m = (tmp1_m * sinpi8_sqrt2_m) >> 16; \
|
||||
tmp2_m = (tmp2_m * sinpi8_sqrt2_m) >> 16; \
|
||||
out_m = __msa_pckev_h((v8i16)tmp2_m, (v8i16)tmp1_m); \
|
||||
\
|
||||
out_m; \
|
||||
})
|
||||
|
||||
#define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 a1_m, b1_m, c1_m, d1_m; \
|
||||
v8i16 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
|
||||
v8i16 const_cospi8sqrt2minus1_m; \
|
||||
\
|
||||
const_cospi8sqrt2minus1_m = __msa_fill_h(cospi8sqrt2minus1); \
|
||||
a1_m = in0 + in2; \
|
||||
b1_m = in0 - in2; \
|
||||
c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \
|
||||
c_tmp2_m = __msa_mul_q_h(in3, const_cospi8sqrt2minus1_m); \
|
||||
c_tmp2_m = c_tmp2_m >> 1; \
|
||||
c_tmp2_m = in3 + c_tmp2_m; \
|
||||
c1_m = c_tmp1_m - c_tmp2_m; \
|
||||
d_tmp1_m = __msa_mul_q_h(in1, const_cospi8sqrt2minus1_m); \
|
||||
d_tmp1_m = d_tmp1_m >> 1; \
|
||||
d_tmp1_m = in1 + d_tmp1_m; \
|
||||
d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \
|
||||
d1_m = d_tmp1_m + d_tmp2_m; \
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
#define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v4i32 a1_m, b1_m, c1_m, d1_m; \
|
||||
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
|
||||
v4i32 const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \
|
||||
\
|
||||
const_cospi8sqrt2minus1_m = __msa_fill_w(cospi8sqrt2minus1); \
|
||||
sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
|
||||
a1_m = in0 + in2; \
|
||||
b1_m = in0 - in2; \
|
||||
c_tmp1_m = (in1 * sinpi8_sqrt2_m) >> 16; \
|
||||
c_tmp2_m = in3 + ((in3 * const_cospi8sqrt2minus1_m) >> 16); \
|
||||
c1_m = c_tmp1_m - c_tmp2_m; \
|
||||
d_tmp1_m = in1 + ((in1 * const_cospi8sqrt2minus1_m) >> 16); \
|
||||
d_tmp2_m = (in3 * sinpi8_sqrt2_m) >> 16; \
|
||||
d1_m = d_tmp1_m + d_tmp2_m; \
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
static void idct4x4_addblk_msa(int16_t *input, uint8_t *pred,
|
||||
int32_t pred_stride,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
v16i8 zero = { 0 };
|
||||
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
|
||||
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
UNPCK_SH_SW(input0, in0, in1);
|
||||
UNPCK_SH_SW(input1, in2, in3);
|
||||
VP8_IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
|
||||
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
|
||||
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
|
||||
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
|
||||
ILVR_B4_SW(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
|
||||
res2, res3);
|
||||
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
res0 = CLIP_SW_0_255(res0);
|
||||
res1 = CLIP_SW_0_255(res1);
|
||||
res2 = CLIP_SW_0_255(res2);
|
||||
res3 = CLIP_SW_0_255(res3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
static void idct4x4_addconst_msa(int16_t in_dc, uint8_t *pred,
|
||||
int32_t pred_stride,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 vec;
|
||||
v8i16 res0, res1, res2, res3;
|
||||
v16i8 zero = { 0 };
|
||||
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
|
||||
v16i8 mask = { 0, 2, 4, 6, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
vec = __msa_fill_h(in_dc);
|
||||
vec = __msa_srari_h(vec, 3);
|
||||
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
|
||||
ILVR_B4_SH(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff)
|
||||
{
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, a1, b1, c1, d1;
|
||||
v4i32 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
UNPCK_SH_SW(input0, in0, in1);
|
||||
UNPCK_SH_SW(input1, in2, in3);
|
||||
BUTTERFLY_4(in0, in1, in2, in3, a1, b1, c1, d1);
|
||||
BUTTERFLY_4(a1, d1, c1, b1, hz0, hz1, hz3, hz2);
|
||||
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
BUTTERFLY_4(hz0, hz1, hz2, hz3, a1, b1, c1, d1);
|
||||
BUTTERFLY_4(a1, d1, c1, b1, vt0, vt1, vt3, vt2);
|
||||
ADD4(vt0, 3, vt1, 3, vt2, 3, vt3, 3, vt0, vt1, vt2, vt3);
|
||||
SRA_4V(vt0, vt1, vt2, vt3, 3);
|
||||
mb_dq_coeff[0] = __msa_copy_s_h((v8i16)vt0, 0);
|
||||
mb_dq_coeff[16] = __msa_copy_s_h((v8i16)vt1, 0);
|
||||
mb_dq_coeff[32] = __msa_copy_s_h((v8i16)vt2, 0);
|
||||
mb_dq_coeff[48] = __msa_copy_s_h((v8i16)vt3, 0);
|
||||
mb_dq_coeff[64] = __msa_copy_s_h((v8i16)vt0, 2);
|
||||
mb_dq_coeff[80] = __msa_copy_s_h((v8i16)vt1, 2);
|
||||
mb_dq_coeff[96] = __msa_copy_s_h((v8i16)vt2, 2);
|
||||
mb_dq_coeff[112] = __msa_copy_s_h((v8i16)vt3, 2);
|
||||
mb_dq_coeff[128] = __msa_copy_s_h((v8i16)vt0, 4);
|
||||
mb_dq_coeff[144] = __msa_copy_s_h((v8i16)vt1, 4);
|
||||
mb_dq_coeff[160] = __msa_copy_s_h((v8i16)vt2, 4);
|
||||
mb_dq_coeff[176] = __msa_copy_s_h((v8i16)vt3, 4);
|
||||
mb_dq_coeff[192] = __msa_copy_s_h((v8i16)vt0, 6);
|
||||
mb_dq_coeff[208] = __msa_copy_s_h((v8i16)vt1, 6);
|
||||
mb_dq_coeff[224] = __msa_copy_s_h((v8i16)vt2, 6);
|
||||
mb_dq_coeff[240] = __msa_copy_s_h((v8i16)vt3, 6);
|
||||
}
|
||||
|
||||
static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 input0, input1, dequant_in0, dequant_in1, mul0, mul1;
|
||||
v8i16 in0, in1, in2, in3;
|
||||
v8i16 hz0_h, hz1_h, hz2_h, hz3_h;
|
||||
v16i8 dest0, dest1, dest2, dest3;
|
||||
v4i32 hz0_w, hz1_w, hz2_w, hz3_w;
|
||||
v4i32 vt0, vt1, vt2, vt3, res0, res1, res2, res3;
|
||||
v2i64 zero = { 0 };
|
||||
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
|
||||
MUL2(input0, dequant_in0, input1, dequant_in1, mul0, mul1);
|
||||
PCKEV_D2_SH(zero, mul0, zero, mul1, in0, in2);
|
||||
PCKOD_D2_SH(zero, mul0, zero, mul1, in1, in3);
|
||||
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h);
|
||||
PCKEV_D2_SH(hz1_h, hz0_h, hz3_h, hz2_h, mul0, mul1);
|
||||
UNPCK_SH_SW(mul0, hz0_w, hz1_w);
|
||||
UNPCK_SH_SW(mul1, hz2_w, hz3_w);
|
||||
TRANSPOSE4x4_SW_SW(hz0_w, hz1_w, hz2_w, hz3_w, hz0_w, hz1_w, hz2_w, hz3_w);
|
||||
VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3);
|
||||
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
|
||||
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
|
||||
res2, res3);
|
||||
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
res0 = CLIP_SW_0_255(res0);
|
||||
res1 = CLIP_SW_0_255(res1);
|
||||
res2 = CLIP_SW_0_255(res2);
|
||||
res3 = CLIP_SW_0_255(res3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
static void dequant_idct4x4_addblk_2x_msa(int16_t *input,
|
||||
int16_t *dequant_input,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v16u8 dest0, dest1, dest2, dest3;
|
||||
v8i16 in0, in1, in2, in3;
|
||||
v8i16 mul0, mul1, mul2, mul3, dequant_in0, dequant_in1;
|
||||
v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v8i16 res0, res1, res2, res3;
|
||||
v4i32 hz0l, hz1l, hz2l, hz3l, hz0r, hz1r, hz2r, hz3r;
|
||||
v4i32 vt0l, vt1l, vt2l, vt3l, vt0r, vt1r, vt2r, vt3r;
|
||||
v16i8 zero = { 0 };
|
||||
|
||||
LD_SH4(input, 8, in0, in1, in2, in3);
|
||||
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
|
||||
MUL4(in0, dequant_in0, in1, dequant_in1, in2, dequant_in0, in3, dequant_in1,
|
||||
mul0, mul1, mul2, mul3);
|
||||
PCKEV_D2_SH(mul2, mul0, mul3, mul1, in0, in2);
|
||||
PCKOD_D2_SH(mul2, mul0, mul3, mul1, in1, in3);
|
||||
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
|
||||
TRANSPOSE_TWO_4x4_H(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
UNPCK_SH_SW(hz0, hz0r, hz0l);
|
||||
UNPCK_SH_SW(hz1, hz1r, hz1l);
|
||||
UNPCK_SH_SW(hz2, hz2r, hz2l);
|
||||
UNPCK_SH_SW(hz3, hz3r, hz3l);
|
||||
VP8_IDCT_1D_W(hz0l, hz1l, hz2l, hz3l, vt0l, vt1l, vt2l, vt3l);
|
||||
SRARI_W4_SW(vt0l, vt1l, vt2l, vt3l, 3);
|
||||
VP8_IDCT_1D_W(hz0r, hz1r, hz2r, hz3r, vt0r, vt1r, vt2r, vt3r);
|
||||
SRARI_W4_SW(vt0r, vt1r, vt2r, vt3r, 3);
|
||||
PCKEV_H4_SH(vt0l, vt0r, vt1l, vt1r, vt2l, vt2r, vt3l, vt3r, vt0, vt1, vt2,
|
||||
vt3);
|
||||
TRANSPOSE_TWO_4x4_H(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
|
||||
res2, res3);
|
||||
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
|
||||
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
|
||||
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sw $zero, 0(%[input]) \n\t"
|
||||
"sw $zero, 4(%[input]) \n\t"
|
||||
"sw $zero, 8(%[input]) \n\t"
|
||||
"sw $zero, 12(%[input]) \n\t"
|
||||
"sw $zero, 16(%[input]) \n\t"
|
||||
"sw $zero, 20(%[input]) \n\t"
|
||||
"sw $zero, 24(%[input]) \n\t"
|
||||
"sw $zero, 28(%[input]) \n\t"
|
||||
"sw $zero, 32(%[input]) \n\t"
|
||||
"sw $zero, 36(%[input]) \n\t"
|
||||
"sw $zero, 40(%[input]) \n\t"
|
||||
"sw $zero, 44(%[input]) \n\t"
|
||||
"sw $zero, 48(%[input]) \n\t"
|
||||
"sw $zero, 52(%[input]) \n\t"
|
||||
"sw $zero, 56(%[input]) \n\t"
|
||||
"sw $zero, 60(%[input]) \n\t"::
|
||||
|
||||
[input] "r"(input)
|
||||
);
|
||||
}
|
||||
|
||||
static void dequant_idct_addconst_2x_msa(int16_t *input, int16_t *dequant_input,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 input_dc0, input_dc1, vec;
|
||||
v16u8 dest0, dest1, dest2, dest3;
|
||||
v16i8 zero = { 0 };
|
||||
v8i16 res0, res1, res2, res3;
|
||||
|
||||
input_dc0 = __msa_fill_h(input[0] * dequant_input[0]);
|
||||
input_dc1 = __msa_fill_h(input[16] * dequant_input[0]);
|
||||
SRARI_H2_SH(input_dc0, input_dc1, 3);
|
||||
vec = (v8i16)__msa_pckev_d((v2i64)input_dc1, (v2i64)input_dc0);
|
||||
input[0] = 0;
|
||||
input[16] = 0;
|
||||
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0,
|
||||
res1, res2, res3);
|
||||
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
|
||||
res2, res3);
|
||||
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
|
||||
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
|
||||
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
void vp8_short_idct4x4llm_msa(int16_t *input, uint8_t *pred_ptr,
|
||||
int32_t pred_stride, uint8_t *dst_ptr,
|
||||
int32_t dst_stride)
|
||||
{
|
||||
idct4x4_addblk_msa(input, pred_ptr, pred_stride, dst_ptr, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_msa(int16_t input_dc, uint8_t *pred_ptr,
|
||||
int32_t pred_stride, uint8_t *dst_ptr,
|
||||
int32_t dst_stride)
|
||||
{
|
||||
idct4x4_addconst_msa(input_dc, pred_ptr, pred_stride, dst_ptr, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_dequantize_b_msa(BLOCKD *d, int16_t *DQC)
|
||||
{
|
||||
v8i16 dqc0, dqc1, q0, q1, dq0, dq1;
|
||||
|
||||
LD_SH2(DQC, 8, dqc0, dqc1);
|
||||
LD_SH2(d->qcoeff, 8, q0, q1);
|
||||
MUL2(dqc0, q0, dqc1, q1, dq0, dq1);
|
||||
ST_SH2(dq0, dq1, d->dqcoeff, 8);
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_msa(int16_t *input, int16_t *dq,
|
||||
uint8_t *dest, int32_t stride)
|
||||
{
|
||||
dequant_idct4x4_addblk_msa(input, dq, dest, stride);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sw $zero, 0(%[input]) \n\t"
|
||||
"sw $zero, 4(%[input]) \n\t"
|
||||
"sw $zero, 8(%[input]) \n\t"
|
||||
"sw $zero, 12(%[input]) \n\t"
|
||||
"sw $zero, 16(%[input]) \n\t"
|
||||
"sw $zero, 20(%[input]) \n\t"
|
||||
"sw $zero, 24(%[input]) \n\t"
|
||||
"sw $zero, 28(%[input]) \n\t"
|
||||
|
||||
:
|
||||
: [input] "r" (input)
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_y_block_msa(int16_t *q, int16_t *dq,
|
||||
uint8_t *dst, int32_t stride,
|
||||
char *eobs)
|
||||
{
|
||||
int16_t *eobs_h = (int16_t *)eobs;
|
||||
uint8_t i;
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
if (eobs_h[0])
|
||||
{
|
||||
if (eobs_h[0] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dst, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dst, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (eobs_h[1])
|
||||
{
|
||||
if (eobs_h[1] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dst + 8, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dst + 8, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dst += (4 * stride);
|
||||
eobs_h += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq,
|
||||
uint8_t *dstu, uint8_t *dstv,
|
||||
int32_t stride, char *eobs)
|
||||
{
|
||||
int16_t *eobs_h = (int16_t *)eobs;
|
||||
|
||||
if (eobs_h[0])
|
||||
{
|
||||
if (eobs_h[0] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += (stride * 4);
|
||||
|
||||
if (eobs_h[1])
|
||||
{
|
||||
if (eobs_h[1] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (eobs_h[2])
|
||||
{
|
||||
if (eobs_h[2] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += (stride * 4);
|
||||
|
||||
if (eobs_h[3])
|
||||
{
|
||||
if (eobs_h[3] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,826 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
#define VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask) \
|
||||
{ \
|
||||
v16u8 p1_a_sub_q1, p0_a_sub_q0; \
|
||||
\
|
||||
p0_a_sub_q0 = __msa_asub_u_b(p0, q0); \
|
||||
p1_a_sub_q1 = __msa_asub_u_b(p1, q1); \
|
||||
p1_a_sub_q1 = (v16u8)__msa_srli_b((v16i8)p1_a_sub_q1, 1); \
|
||||
p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0); \
|
||||
mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1); \
|
||||
mask = ((v16u8)mask <= b_limit); \
|
||||
}
|
||||
|
||||
#define VP8_LPF_FILTER4_4W(p1_in_out, p0_in_out, q0_in_out, q1_in_out, \
|
||||
mask_in, hev_in) \
|
||||
{ \
|
||||
v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
|
||||
v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
|
||||
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
|
||||
\
|
||||
p1_m = (v16i8)__msa_xori_b(p1_in_out, 0x80); \
|
||||
p0_m = (v16i8)__msa_xori_b(p0_in_out, 0x80); \
|
||||
q0_m = (v16i8)__msa_xori_b(q0_in_out, 0x80); \
|
||||
q1_m = (v16i8)__msa_xori_b(q1_in_out, 0x80); \
|
||||
\
|
||||
filt = __msa_subs_s_b(p1_m, q1_m); \
|
||||
\
|
||||
filt = filt & (v16i8)hev_in; \
|
||||
\
|
||||
q0_sub_p0 = q0_m - p0_m; \
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
\
|
||||
cnst3h = __msa_ldi_h(3); \
|
||||
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
|
||||
q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
|
||||
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
|
||||
filt_r += q0_sub_p0_r; \
|
||||
filt_r = __msa_sat_s_h(filt_r, 7); \
|
||||
\
|
||||
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0); \
|
||||
q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h); \
|
||||
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
|
||||
filt_l += q0_sub_p0_l; \
|
||||
filt_l = __msa_sat_s_h(filt_l, 7); \
|
||||
\
|
||||
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
|
||||
filt = filt & (v16i8)mask_in; \
|
||||
\
|
||||
cnst4b = __msa_ldi_b(4); \
|
||||
filt1 = __msa_adds_s_b(filt, cnst4b); \
|
||||
filt1 >>= 3; \
|
||||
\
|
||||
cnst3b = __msa_ldi_b(3); \
|
||||
filt2 = __msa_adds_s_b(filt, cnst3b); \
|
||||
filt2 >>= 3; \
|
||||
\
|
||||
q0_m = __msa_subs_s_b(q0_m, filt1); \
|
||||
q0_in_out = __msa_xori_b((v16u8)q0_m, 0x80); \
|
||||
p0_m = __msa_adds_s_b(p0_m, filt2); \
|
||||
p0_in_out = __msa_xori_b((v16u8)p0_m, 0x80); \
|
||||
\
|
||||
filt = __msa_srari_b(filt1, 1); \
|
||||
hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
|
||||
filt = filt & (v16i8)hev_in; \
|
||||
\
|
||||
q1_m = __msa_subs_s_b(q1_m, filt); \
|
||||
q1_in_out = __msa_xori_b((v16u8)q1_m, 0x80); \
|
||||
p1_m = __msa_adds_s_b(p1_m, filt); \
|
||||
p1_in_out = __msa_xori_b((v16u8)p1_m, 0x80); \
|
||||
}
|
||||
|
||||
#define VP8_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) \
|
||||
{ \
|
||||
v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, q0_sub_p0_sign; \
|
||||
v16i8 filt, filt1, filt2, cnst4b, cnst3b, filt_sign; \
|
||||
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
|
||||
\
|
||||
p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
|
||||
p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
|
||||
q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
|
||||
q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
|
||||
\
|
||||
filt = __msa_subs_s_b(p1_m, q1_m); \
|
||||
\
|
||||
q0_sub_p0 = q0_m - p0_m; \
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
\
|
||||
cnst3h = __msa_ldi_h(3); \
|
||||
q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0); \
|
||||
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0); \
|
||||
q0_sub_p0_r *= cnst3h; \
|
||||
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
|
||||
filt_r += q0_sub_p0_r; \
|
||||
filt_r = __msa_sat_s_h(filt_r, 7); \
|
||||
\
|
||||
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0); \
|
||||
q0_sub_p0_l *= cnst3h; \
|
||||
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
|
||||
filt_l += q0_sub_p0_l; \
|
||||
filt_l = __msa_sat_s_h(filt_l, 7); \
|
||||
\
|
||||
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
|
||||
filt = filt & (v16i8)(mask); \
|
||||
\
|
||||
cnst4b = __msa_ldi_b(4); \
|
||||
filt1 = __msa_adds_s_b(filt, cnst4b); \
|
||||
filt1 >>= 3; \
|
||||
\
|
||||
cnst3b = __msa_ldi_b(3); \
|
||||
filt2 = __msa_adds_s_b(filt, cnst3b); \
|
||||
filt2 >>= 3; \
|
||||
\
|
||||
q0_m = __msa_subs_s_b(q0_m, filt1); \
|
||||
p0_m = __msa_adds_s_b(p0_m, filt2); \
|
||||
q0_in = __msa_xori_b((v16u8)q0_m, 0x80); \
|
||||
p0_in = __msa_xori_b((v16u8)p0_m, 0x80); \
|
||||
}
|
||||
|
||||
#define VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \
|
||||
{ \
|
||||
v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \
|
||||
v16i8 filt, q0_sub_p0, cnst4b, cnst3b; \
|
||||
v16i8 u, filt1, filt2, filt_sign, q0_sub_p0_sign; \
|
||||
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_r, u_r, u_l, filt_l; \
|
||||
v8i16 cnst3h, cnst27h, cnst18h, cnst63h; \
|
||||
\
|
||||
cnst3h = __msa_ldi_h(3); \
|
||||
\
|
||||
p2_m = (v16i8)__msa_xori_b(p2, 0x80); \
|
||||
p1_m = (v16i8)__msa_xori_b(p1, 0x80); \
|
||||
p0_m = (v16i8)__msa_xori_b(p0, 0x80); \
|
||||
q0_m = (v16i8)__msa_xori_b(q0, 0x80); \
|
||||
q1_m = (v16i8)__msa_xori_b(q1, 0x80); \
|
||||
q2_m = (v16i8)__msa_xori_b(q2, 0x80); \
|
||||
\
|
||||
filt = __msa_subs_s_b(p1_m, q1_m); \
|
||||
q0_sub_p0 = q0_m - p0_m; \
|
||||
q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0); \
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
\
|
||||
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0); \
|
||||
q0_sub_p0_r *= cnst3h; \
|
||||
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
|
||||
filt_r = filt_r + q0_sub_p0_r; \
|
||||
filt_r = __msa_sat_s_h(filt_r, 7); \
|
||||
\
|
||||
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0); \
|
||||
q0_sub_p0_l *= cnst3h; \
|
||||
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
|
||||
filt_l = filt_l + q0_sub_p0_l; \
|
||||
filt_l = __msa_sat_s_h(filt_l, 7); \
|
||||
\
|
||||
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
|
||||
filt = filt & (v16i8)mask; \
|
||||
filt2 = filt & (v16i8)hev; \
|
||||
\
|
||||
hev = __msa_xori_b(hev, 0xff); \
|
||||
filt = filt & (v16i8)hev; \
|
||||
cnst4b = __msa_ldi_b(4); \
|
||||
filt1 = __msa_adds_s_b(filt2, cnst4b); \
|
||||
filt1 >>= 3; \
|
||||
cnst3b = __msa_ldi_b(3); \
|
||||
filt2 = __msa_adds_s_b(filt2, cnst3b); \
|
||||
filt2 >>= 3; \
|
||||
q0_m = __msa_subs_s_b(q0_m, filt1); \
|
||||
p0_m = __msa_adds_s_b(p0_m, filt2); \
|
||||
\
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \
|
||||
\
|
||||
cnst27h = __msa_ldi_h(27); \
|
||||
cnst63h = __msa_ldi_h(63); \
|
||||
\
|
||||
u_r = filt_r * cnst27h; \
|
||||
u_r += cnst63h; \
|
||||
u_r >>= 7; \
|
||||
u_r = __msa_sat_s_h(u_r, 7); \
|
||||
u_l = filt_l * cnst27h; \
|
||||
u_l += cnst63h; \
|
||||
u_l >>= 7; \
|
||||
u_l = __msa_sat_s_h(u_l, 7); \
|
||||
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
|
||||
q0_m = __msa_subs_s_b(q0_m, u); \
|
||||
q0 = __msa_xori_b((v16u8)q0_m, 0x80); \
|
||||
p0_m = __msa_adds_s_b(p0_m, u); \
|
||||
p0 = __msa_xori_b((v16u8)p0_m, 0x80); \
|
||||
cnst18h = __msa_ldi_h(18); \
|
||||
u_r = filt_r * cnst18h; \
|
||||
u_r += cnst63h; \
|
||||
u_r >>= 7; \
|
||||
u_r = __msa_sat_s_h(u_r, 7); \
|
||||
\
|
||||
u_l = filt_l * cnst18h; \
|
||||
u_l += cnst63h; \
|
||||
u_l >>= 7; \
|
||||
u_l = __msa_sat_s_h(u_l, 7); \
|
||||
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
|
||||
q1_m = __msa_subs_s_b(q1_m, u); \
|
||||
q1 = __msa_xori_b((v16u8)q1_m, 0x80); \
|
||||
p1_m = __msa_adds_s_b(p1_m, u); \
|
||||
p1 = __msa_xori_b((v16u8)p1_m, 0x80); \
|
||||
u_r = filt_r << 3; \
|
||||
u_r += filt_r + cnst63h; \
|
||||
u_r >>= 7; \
|
||||
u_r = __msa_sat_s_h(u_r, 7); \
|
||||
\
|
||||
u_l = filt_l << 3; \
|
||||
u_l += filt_l + cnst63h; \
|
||||
u_l >>= 7; \
|
||||
u_l = __msa_sat_s_h(u_l, 7); \
|
||||
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
|
||||
q2_m = __msa_subs_s_b(q2_m, u); \
|
||||
q2 = __msa_xori_b((v16u8)q2_m, 0x80); \
|
||||
p2_m = __msa_adds_s_b(p2_m, u); \
|
||||
p2 = __msa_xori_b((v16u8)p2_m, 0x80); \
|
||||
}
|
||||
|
||||
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, \
|
||||
q0_in, q1_in, q2_in, q3_in, \
|
||||
limit_in, b_limit_in, thresh_in, \
|
||||
hev_out, mask_out, flat_out) \
|
||||
{ \
|
||||
v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \
|
||||
v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \
|
||||
\
|
||||
p3_asub_p2_m = __msa_asub_u_b((p3_in), (p2_in)); \
|
||||
p2_asub_p1_m = __msa_asub_u_b((p2_in), (p1_in)); \
|
||||
p1_asub_p0_m = __msa_asub_u_b((p1_in), (p0_in)); \
|
||||
q1_asub_q0_m = __msa_asub_u_b((q1_in), (q0_in)); \
|
||||
q2_asub_q1_m = __msa_asub_u_b((q2_in), (q1_in)); \
|
||||
q3_asub_q2_m = __msa_asub_u_b((q3_in), (q2_in)); \
|
||||
p0_asub_q0_m = __msa_asub_u_b((p0_in), (q0_in)); \
|
||||
p1_asub_q1_m = __msa_asub_u_b((p1_in), (q1_in)); \
|
||||
flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \
|
||||
hev_out = (thresh_in) < (v16u8)flat_out; \
|
||||
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \
|
||||
p1_asub_q1_m >>= 1; \
|
||||
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \
|
||||
mask_out = (b_limit_in) < p0_asub_q0_m; \
|
||||
mask_out = __msa_max_u_b(flat_out, mask_out); \
|
||||
p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \
|
||||
mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \
|
||||
q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \
|
||||
mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \
|
||||
mask_out = (limit_in) < (v16u8)mask_out; \
|
||||
mask_out = __msa_xori_b(mask_out, 0xff); \
|
||||
}
|
||||
|
||||
#define VP8_ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride) \
|
||||
{ \
|
||||
uint16_t tmp0_h; \
|
||||
uint32_t tmp0_w; \
|
||||
\
|
||||
tmp0_w = __msa_copy_u_w((v4i32)in0, in0_idx); \
|
||||
tmp0_h = __msa_copy_u_h((v8i16)in1, in1_idx); \
|
||||
SW(tmp0_w, pdst); \
|
||||
SH(tmp0_h, pdst + stride); \
|
||||
}
|
||||
|
||||
|
||||
static void loop_filter_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit0_ptr,
|
||||
const uint8_t *limit0_ptr,
|
||||
const uint8_t *thresh0_ptr,
|
||||
const uint8_t *b_limit1_ptr,
|
||||
const uint8_t *limit1_ptr,
|
||||
const uint8_t *thresh1_ptr)
|
||||
{
|
||||
v16u8 mask, hev, flat;
|
||||
v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
|
||||
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
|
||||
thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
|
||||
thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
|
||||
|
||||
b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
|
||||
b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
|
||||
b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
|
||||
|
||||
limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
|
||||
limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
|
||||
limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
|
||||
hev, mask, flat);
|
||||
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
|
||||
|
||||
ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
|
||||
}
|
||||
|
||||
static void loop_filter_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit0_ptr,
|
||||
const uint8_t *limit0_ptr,
|
||||
const uint8_t *thresh0_ptr,
|
||||
const uint8_t *b_limit1_ptr,
|
||||
const uint8_t *limit1_ptr,
|
||||
const uint8_t *thresh1_ptr)
|
||||
{
|
||||
v16u8 mask, hev, flat;
|
||||
v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
|
||||
v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
|
||||
LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
LD_UB8(src - 4 + (8 * pitch), pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
|
||||
thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
|
||||
thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
|
||||
thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
|
||||
|
||||
b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
|
||||
b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
|
||||
b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
|
||||
|
||||
limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
|
||||
limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
|
||||
limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
|
||||
hev, mask, flat);
|
||||
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
|
||||
ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
|
||||
ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
|
||||
|
||||
src -= 2;
|
||||
ST4x8_UB(tmp2, tmp3, src, pitch);
|
||||
src += (8 * pitch);
|
||||
ST4x8_UB(tmp4, tmp5, src, pitch);
|
||||
}
|
||||
|
||||
static void mbloop_filter_horizontal_edge_y_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint8_t *temp_src;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
temp_src = src - (pitch << 2);
|
||||
LD_UB8(temp_src, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
|
||||
temp_src = src - 3 * pitch;
|
||||
ST_UB4(p2, p1, p0, q0, temp_src, pitch);
|
||||
temp_src += (4 * pitch);
|
||||
ST_UB2(q1, q2, temp_src, pitch);
|
||||
}
|
||||
|
||||
static void mbloop_filter_horizontal_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
|
||||
int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint8_t *temp_src;
|
||||
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
|
||||
v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
|
||||
temp_src = src_u - (pitch << 2);
|
||||
LD_UB8(temp_src, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u);
|
||||
temp_src = src_v - (pitch << 2);
|
||||
LD_UB8(temp_src, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
|
||||
|
||||
ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
|
||||
ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
|
||||
|
||||
p2_d = __msa_copy_u_d((v2i64)p2, 0);
|
||||
p1_d = __msa_copy_u_d((v2i64)p1, 0);
|
||||
p0_d = __msa_copy_u_d((v2i64)p0, 0);
|
||||
q0_d = __msa_copy_u_d((v2i64)q0, 0);
|
||||
q1_d = __msa_copy_u_d((v2i64)q1, 0);
|
||||
q2_d = __msa_copy_u_d((v2i64)q2, 0);
|
||||
src_u -= (pitch * 3);
|
||||
SD4(p2_d, p1_d, p0_d, q0_d, src_u, pitch);
|
||||
src_u += 4 * pitch;
|
||||
SD(q1_d, src_u);
|
||||
src_u += pitch;
|
||||
SD(q2_d, src_u);
|
||||
|
||||
p2_d = __msa_copy_u_d((v2i64)p2, 1);
|
||||
p1_d = __msa_copy_u_d((v2i64)p1, 1);
|
||||
p0_d = __msa_copy_u_d((v2i64)p0, 1);
|
||||
q0_d = __msa_copy_u_d((v2i64)q0, 1);
|
||||
q1_d = __msa_copy_u_d((v2i64)q1, 1);
|
||||
q2_d = __msa_copy_u_d((v2i64)q2, 1);
|
||||
src_v -= (pitch * 3);
|
||||
SD4(p2_d, p1_d, p0_d, q0_d, src_v, pitch);
|
||||
src_v += 4 * pitch;
|
||||
SD(q1_d, src_v);
|
||||
src_v += pitch;
|
||||
SD(q2_d, src_v);
|
||||
}
|
||||
|
||||
static void mbloop_filter_vertical_edge_y_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint8_t *temp_src;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
v16u8 row9, row10, row11, row12, row13, row14, row15;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
temp_src = src - 4;
|
||||
LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
temp_src += (8 * pitch);
|
||||
LD_UB8(temp_src, pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
|
||||
ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
|
||||
ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7);
|
||||
ILVRL_B2_SH(q2, q1, tmp2, tmp5);
|
||||
|
||||
temp_src = src - 3;
|
||||
VP8_ST6x1_UB(tmp3, 0, tmp2, 0, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 1, tmp2, 1, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 2, tmp2, 2, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 3, tmp2, 3, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 0, tmp2, 4, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 1, tmp2, 5, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 2, tmp2, 6, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 3, tmp2, 7, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 0, tmp5, 0, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 1, tmp5, 1, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 2, tmp5, 2, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 3, tmp5, 3, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 0, tmp5, 4, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 1, tmp5, 5, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 2, tmp5, 6, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 3, tmp5, 7, temp_src, 4);
|
||||
}
|
||||
|
||||
static void mbloop_filter_vertical_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
|
||||
int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
v16u8 row9, row10, row11, row12, row13, row14, row15;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
|
||||
LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
LD_UB8(src_v - 4, pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
|
||||
|
||||
ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
|
||||
ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7);
|
||||
ILVRL_B2_SH(q2, q1, tmp2, tmp5);
|
||||
|
||||
src_u -= 3;
|
||||
VP8_ST6x1_UB(tmp3, 0, tmp2, 0, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 1, tmp2, 1, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 2, tmp2, 2, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 3, tmp2, 3, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 0, tmp2, 4, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 1, tmp2, 5, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 2, tmp2, 6, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 3, tmp2, 7, src_u, 4);
|
||||
|
||||
src_v -= 3;
|
||||
VP8_ST6x1_UB(tmp6, 0, tmp5, 0, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 1, tmp5, 1, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 2, tmp5, 2, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 3, tmp5, 3, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 0, tmp5, 4, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 1, tmp5, 5, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 2, tmp5, 6, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 3, tmp5, 7, src_v, 4);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_horizontal_edge_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit_ptr)
|
||||
{
|
||||
v16u8 p1, p0, q1, q0;
|
||||
v16u8 mask, b_limit;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
|
||||
LD_UB4(src - (pitch << 1), pitch, p1, p0, q0, q1);
|
||||
VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
|
||||
VP8_SIMPLE_FILT(p1, p0, q0, q1, mask);
|
||||
ST_UB2(p0, q0, (src - pitch), pitch);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit_ptr)
|
||||
{
|
||||
uint8_t *temp_src;
|
||||
v16u8 p1, p0, q1, q0;
|
||||
v16u8 mask, b_limit;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
v16u8 row9, row10, row11, row12, row13, row14, row15;
|
||||
v8i16 tmp0, tmp1;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
|
||||
temp_src = src - 2;
|
||||
LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
temp_src += (8 * pitch);
|
||||
LD_UB8(temp_src, pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p1, p0, q0, q1);
|
||||
VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
|
||||
VP8_SIMPLE_FILT(p1, p0, q0, q1, mask);
|
||||
ILVRL_B2_SH(q0, p0, tmp1, tmp0);
|
||||
|
||||
src -= 1;
|
||||
ST2x4_UB(tmp1, 0, src, pitch);
|
||||
src += 4 * pitch;
|
||||
ST2x4_UB(tmp1, 4, src, pitch);
|
||||
src += 4 * pitch;
|
||||
ST2x4_UB(tmp0, 0, src, pitch);
|
||||
src += 4 * pitch;
|
||||
ST2x4_UB(tmp0, 4, src, pitch);
|
||||
src += 4 * pitch;
|
||||
}
|
||||
|
||||
static void loop_filter_horizontal_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
|
||||
int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint64_t p1_d, p0_d, q0_d, q1_d;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
|
||||
v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v;
|
||||
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
|
||||
src_u = src_u - (pitch << 2);
|
||||
LD_UB8(src_u, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u);
|
||||
src_u += (5 * pitch);
|
||||
src_v = src_v - (pitch << 2);
|
||||
LD_UB8(src_v, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
|
||||
src_v += (5 * pitch);
|
||||
|
||||
/* right 8 element of p3 are u pixel and
|
||||
left 8 element of p3 are v pixel */
|
||||
ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
|
||||
ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
|
||||
|
||||
p1_d = __msa_copy_u_d((v2i64)p1, 0);
|
||||
p0_d = __msa_copy_u_d((v2i64)p0, 0);
|
||||
q0_d = __msa_copy_u_d((v2i64)q0, 0);
|
||||
q1_d = __msa_copy_u_d((v2i64)q1, 0);
|
||||
SD4(q1_d, q0_d, p0_d, p1_d, src_u, (- pitch));
|
||||
|
||||
p1_d = __msa_copy_u_d((v2i64)p1, 1);
|
||||
p0_d = __msa_copy_u_d((v2i64)p0, 1);
|
||||
q0_d = __msa_copy_u_d((v2i64)q0, 1);
|
||||
q1_d = __msa_copy_u_d((v2i64)q1, 1);
|
||||
SD4(q1_d, q0_d, p0_d, p1_d, src_v, (- pitch));
|
||||
}
|
||||
|
||||
static void loop_filter_vertical_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
|
||||
int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint8_t *temp_src_u, *temp_src_v;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
v16u8 row9, row10, row11, row12, row13, row14, row15;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
|
||||
LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
LD_UB8(src_v - 4, pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
|
||||
ILVR_B2_SW(p0, p1, q1, q0, tmp0, tmp1);
|
||||
ILVRL_H2_SW(tmp1, tmp0, tmp2, tmp3);
|
||||
tmp0 = (v4i32)__msa_ilvl_b((v16i8)p0, (v16i8)p1);
|
||||
tmp1 = (v4i32)__msa_ilvl_b((v16i8)q1, (v16i8)q0);
|
||||
ILVRL_H2_SW(tmp1, tmp0, tmp4, tmp5);
|
||||
|
||||
temp_src_u = src_u - 2;
|
||||
ST4x4_UB(tmp2, tmp2, 0, 1, 2, 3, temp_src_u, pitch);
|
||||
temp_src_u += 4 * pitch;
|
||||
ST4x4_UB(tmp3, tmp3, 0, 1, 2, 3, temp_src_u, pitch);
|
||||
|
||||
temp_src_v = src_v - 2;
|
||||
ST4x4_UB(tmp4, tmp4, 0, 1, 2, 3, temp_src_v, pitch);
|
||||
temp_src_v += 4 * pitch;
|
||||
ST4x4_UB(tmp5, tmp5, 0, 1, 2, 3, temp_src_v, pitch);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbh_msa(uint8_t *src_y, uint8_t *src_u,
|
||||
uint8_t *src_v, int32_t pitch_y,
|
||||
int32_t pitch_u_v,
|
||||
loop_filter_info *lpf_info_ptr)
|
||||
{
|
||||
mbloop_filter_horizontal_edge_y_msa(src_y, pitch_y,
|
||||
*lpf_info_ptr->mblim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
if (src_u)
|
||||
{
|
||||
mbloop_filter_horizontal_edge_uv_msa(src_u, src_v, pitch_u_v,
|
||||
*lpf_info_ptr->mblim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbv_msa(uint8_t *src_y, uint8_t *src_u,
|
||||
uint8_t *src_v, int32_t pitch_y,
|
||||
int32_t pitch_u_v,
|
||||
loop_filter_info *lpf_info_ptr)
|
||||
{
|
||||
mbloop_filter_vertical_edge_y_msa(src_y, pitch_y,
|
||||
*lpf_info_ptr->mblim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
if (src_u)
|
||||
{
|
||||
mbloop_filter_vertical_edge_uv_msa(src_u, src_v, pitch_u_v,
|
||||
*lpf_info_ptr->mblim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bh_msa(uint8_t *src_y, uint8_t *src_u,
|
||||
uint8_t *src_v, int32_t pitch_y,
|
||||
int32_t pitch_u_v,
|
||||
loop_filter_info *lpf_info_ptr)
|
||||
{
|
||||
loop_filter_horizontal_4_dual_msa(src_y + 4 * pitch_y, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
loop_filter_horizontal_4_dual_msa(src_y + 8 * pitch_y, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
loop_filter_horizontal_4_dual_msa(src_y + 12 * pitch_y, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
if (src_u)
|
||||
{
|
||||
loop_filter_horizontal_edge_uv_msa(src_u + (4 * pitch_u_v),
|
||||
src_v + (4 * pitch_u_v),
|
||||
pitch_u_v,
|
||||
*lpf_info_ptr->blim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bv_msa(uint8_t *src_y, uint8_t *src_u,
|
||||
uint8_t *src_v, int32_t pitch_y,
|
||||
int32_t pitch_u_v,
|
||||
loop_filter_info *lpf_info_ptr)
|
||||
{
|
||||
loop_filter_vertical_4_dual_msa(src_y + 4, pitch_y, lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
loop_filter_vertical_4_dual_msa(src_y + 8, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
loop_filter_vertical_4_dual_msa(src_y + 12, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
if (src_u)
|
||||
{
|
||||
loop_filter_vertical_edge_uv_msa(src_u + 4, src_v + 4, pitch_u_v,
|
||||
*lpf_info_ptr->blim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_msa(uint8_t *src_y, int32_t pitch_y,
|
||||
const uint8_t *b_limit_ptr)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_msa(src_y + (4 * pitch_y),
|
||||
pitch_y, b_limit_ptr);
|
||||
vp8_loop_filter_simple_horizontal_edge_msa(src_y + (8 * pitch_y),
|
||||
pitch_y, b_limit_ptr);
|
||||
vp8_loop_filter_simple_horizontal_edge_msa(src_y + (12 * pitch_y),
|
||||
pitch_y, b_limit_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_msa(uint8_t *src_y, int32_t pitch_y,
|
||||
const uint8_t *b_limit_ptr)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_msa(src_y + 4, pitch_y, b_limit_ptr);
|
||||
vp8_loop_filter_simple_vertical_edge_msa(src_y + 8, pitch_y, b_limit_ptr);
|
||||
vp8_loop_filter_simple_vertical_edge_msa(src_y + 12, pitch_y, b_limit_ptr);
|
||||
}
|
|
@ -1,146 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/postproc.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static void filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
uint64_t src0_d, src1_d, dst0_d, dst1_d;
|
||||
v16i8 src0 = { 0 };
|
||||
v16i8 src1 = { 0 };
|
||||
v16i8 dst0 = { 0 };
|
||||
v16i8 dst1 = { 0 };
|
||||
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
|
||||
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
|
||||
for (row = 2; row--;)
|
||||
{
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src0);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst0);
|
||||
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src1);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst1);
|
||||
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst0, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst1, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
v16i8 src0, src1, src2, src3;
|
||||
v16i8 dst0, dst1, dst2, dst3;
|
||||
v8i16 src_wt, dst_wt;
|
||||
v8i16 res_h_r, res_h_l;
|
||||
v8i16 src_r, src_l, dst_r, dst_l;
|
||||
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
|
||||
for (row = 4; row--;)
|
||||
{
|
||||
LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
|
||||
src_ptr += (4 * src_stride);
|
||||
LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
|
||||
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src2, src_r, src_l);
|
||||
UNPCK_UB_SH(dst2, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src3, src_r, src_l);
|
||||
UNPCK_UB_SH(dst3, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
filter_by_weight16x16_msa(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||
src_weight);
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
filter_by_weight8x8_msa(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||
src_weight);
|
||||
}
|
|
@ -1,851 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static const int16_t vp8_rv_msa[] =
|
||||
{
|
||||
8, 5, 2, 2, 8, 12, 4, 9, 8, 3,
|
||||
0, 3, 9, 0, 0, 0, 8, 3, 14, 4,
|
||||
10, 1, 11, 14, 1, 14, 9, 6, 12, 11,
|
||||
8, 6, 10, 0, 0, 8, 9, 0, 3, 14,
|
||||
8, 11, 13, 4, 2, 9, 0, 3, 9, 6,
|
||||
1, 2, 3, 14, 13, 1, 8, 2, 9, 7,
|
||||
3, 3, 1, 13, 13, 6, 6, 5, 2, 7,
|
||||
11, 9, 11, 8, 7, 3, 2, 0, 13, 13,
|
||||
14, 4, 12, 5, 12, 10, 8, 10, 13, 10,
|
||||
4, 14, 4, 10, 0, 8, 11, 1, 13, 7,
|
||||
7, 14, 6, 14, 13, 2, 13, 5, 4, 4,
|
||||
0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
|
||||
8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
|
||||
3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
|
||||
3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
|
||||
13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
|
||||
5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
|
||||
9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
|
||||
4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
|
||||
3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
|
||||
11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
|
||||
5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
|
||||
0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
|
||||
10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
|
||||
4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
|
||||
0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
|
||||
8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
|
||||
3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
|
||||
3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
|
||||
13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
|
||||
5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
|
||||
9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
|
||||
4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
|
||||
3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
|
||||
11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
|
||||
5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
|
||||
0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
|
||||
10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
|
||||
4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
|
||||
3, 8, 3, 7, 8, 5, 11, 4, 12, 3,
|
||||
11, 9, 14, 8, 14, 13, 4, 3, 1, 2,
|
||||
14, 6, 5, 4, 4, 11, 4, 6, 2, 1,
|
||||
5, 8, 8, 12, 13, 5, 14, 10, 12, 13,
|
||||
0, 9, 5, 5, 11, 10, 13, 9, 10, 13,
|
||||
};
|
||||
|
||||
#define VP8_TRANSPOSE8x16_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
|
||||
out0, out1, out2, out3, \
|
||||
out4, out5, out6, out7, \
|
||||
out8, out9, out10, out11, \
|
||||
out12, out13, out14, out15) \
|
||||
{ \
|
||||
v8i16 temp0, temp1, temp2, temp3, temp4; \
|
||||
v8i16 temp5, temp6, temp7, temp8, temp9; \
|
||||
\
|
||||
ILVR_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, \
|
||||
temp0, temp1, temp2, temp3); \
|
||||
ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp5, temp4, temp6, temp7); \
|
||||
ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp5, temp4, temp8, temp9); \
|
||||
ILVL_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, \
|
||||
temp0, temp1, temp2, temp3); \
|
||||
ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
|
||||
ILVRL_W2_UB(temp5, temp4, out8, out10); \
|
||||
ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
|
||||
ILVRL_W2_UB(temp5, temp4, out12, out14); \
|
||||
out0 = (v16u8)temp6; \
|
||||
out2 = (v16u8)temp7; \
|
||||
out4 = (v16u8)temp8; \
|
||||
out6 = (v16u8)temp9; \
|
||||
out9 = (v16u8)__msa_ilvl_d((v2i64)out8, (v2i64)out8); \
|
||||
out11 = (v16u8)__msa_ilvl_d((v2i64)out10, (v2i64)out10); \
|
||||
out13 = (v16u8)__msa_ilvl_d((v2i64)out12, (v2i64)out12); \
|
||||
out15 = (v16u8)__msa_ilvl_d((v2i64)out14, (v2i64)out14); \
|
||||
out1 = (v16u8)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
|
||||
out3 = (v16u8)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \
|
||||
out5 = (v16u8)__msa_ilvl_d((v2i64)out4, (v2i64)out4); \
|
||||
out7 = (v16u8)__msa_ilvl_d((v2i64)out6, (v2i64)out6); \
|
||||
}
|
||||
|
||||
#define VP8_AVER_IF_RETAIN(above2_in, above1_in, src_in, \
|
||||
below1_in, below2_in, ref, out) \
|
||||
{ \
|
||||
v16u8 temp0, temp1; \
|
||||
\
|
||||
temp1 = __msa_aver_u_b(above2_in, above1_in); \
|
||||
temp0 = __msa_aver_u_b(below2_in, below1_in); \
|
||||
temp1 = __msa_aver_u_b(temp1, temp0); \
|
||||
out = __msa_aver_u_b(src_in, temp1); \
|
||||
temp0 = __msa_asub_u_b(src_in, above2_in); \
|
||||
temp1 = __msa_asub_u_b(src_in, above1_in); \
|
||||
temp0 = (temp0 < ref); \
|
||||
temp1 = (temp1 < ref); \
|
||||
temp0 = temp0 & temp1; \
|
||||
temp1 = __msa_asub_u_b(src_in, below1_in); \
|
||||
temp1 = (temp1 < ref); \
|
||||
temp0 = temp0 & temp1; \
|
||||
temp1 = __msa_asub_u_b(src_in, below2_in); \
|
||||
temp1 = (temp1 < ref); \
|
||||
temp0 = temp0 & temp1; \
|
||||
out = __msa_bmz_v(out, src_in, temp0); \
|
||||
}
|
||||
|
||||
#define TRANSPOSE12x16_B(in0, in1, in2, in3, in4, in5, in6, in7, \
|
||||
in8, in9, in10, in11, in12, in13, in14, in15) \
|
||||
{ \
|
||||
v8i16 temp0, temp1, temp2, temp3, temp4; \
|
||||
v8i16 temp5, temp6, temp7, temp8, temp9; \
|
||||
\
|
||||
ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1); \
|
||||
ILVRL_H2_SH(temp1, temp0, temp2, temp3); \
|
||||
ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1); \
|
||||
ILVRL_H2_SH(temp1, temp0, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp4, temp2, temp0, temp1); \
|
||||
ILVRL_W2_SH(temp5, temp3, temp2, temp3); \
|
||||
ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5); \
|
||||
ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5); \
|
||||
ILVRL_H2_SH(temp5, temp4, temp6, temp7); \
|
||||
ILVR_B2_SH(in13, in12, in15, in14, temp4, temp5); \
|
||||
ILVRL_H2_SH(temp5, temp4, temp8, temp9); \
|
||||
ILVRL_W2_SH(temp8, temp6, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp9, temp7, temp6, temp7); \
|
||||
ILVL_B2_SH(in1, in0, in3, in2, temp8, temp9); \
|
||||
ILVR_D2_UB(temp4, temp0, temp5, temp1, in0, in2); \
|
||||
in1 = (v16u8)__msa_ilvl_d((v2i64)temp4, (v2i64)temp0); \
|
||||
in3 = (v16u8)__msa_ilvl_d((v2i64)temp5, (v2i64)temp1); \
|
||||
ILVL_B2_SH(in5, in4, in7, in6, temp0, temp1); \
|
||||
ILVR_D2_UB(temp6, temp2, temp7, temp3, in4, in6); \
|
||||
in5 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp2); \
|
||||
in7 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp3); \
|
||||
ILVL_B4_SH(in9, in8, in11, in10, in13, in12, in15, in14, \
|
||||
temp2, temp3, temp4, temp5); \
|
||||
ILVR_H4_SH(temp9, temp8, temp1, temp0, temp3, temp2, temp5, temp4, \
|
||||
temp6, temp7, temp8, temp9); \
|
||||
ILVR_W2_SH(temp7, temp6, temp9, temp8, temp0, temp1); \
|
||||
in8 = (v16u8)__msa_ilvr_d((v2i64)temp1, (v2i64)temp0); \
|
||||
in9 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp0); \
|
||||
ILVL_W2_SH(temp7, temp6, temp9, temp8, temp2, temp3); \
|
||||
in10 = (v16u8)__msa_ilvr_d((v2i64)temp3, (v2i64)temp2); \
|
||||
in11 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp2); \
|
||||
}
|
||||
|
||||
#define VP8_TRANSPOSE12x8_UB_UB(in0, in1, in2, in3, in4, in5, \
|
||||
in6, in7, in8, in9, in10, in11) \
|
||||
{ \
|
||||
v8i16 temp0, temp1, temp2, temp3; \
|
||||
v8i16 temp4, temp5, temp6, temp7; \
|
||||
\
|
||||
ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1); \
|
||||
ILVRL_H2_SH(temp1, temp0, temp2, temp3); \
|
||||
ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1); \
|
||||
ILVRL_H2_SH(temp1, temp0, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp4, temp2, temp0, temp1); \
|
||||
ILVRL_W2_SH(temp5, temp3, temp2, temp3); \
|
||||
ILVL_B2_SH(in1, in0, in3, in2, temp4, temp5); \
|
||||
temp4 = __msa_ilvr_h(temp5, temp4); \
|
||||
ILVL_B2_SH(in5, in4, in7, in6, temp6, temp7); \
|
||||
temp5 = __msa_ilvr_h(temp7, temp6); \
|
||||
ILVRL_W2_SH(temp5, temp4, temp6, temp7); \
|
||||
in0 = (v16u8)temp0; \
|
||||
in2 = (v16u8)temp1; \
|
||||
in4 = (v16u8)temp2; \
|
||||
in6 = (v16u8)temp3; \
|
||||
in8 = (v16u8)temp6; \
|
||||
in10 = (v16u8)temp7; \
|
||||
in1 = (v16u8)__msa_ilvl_d((v2i64)temp0, (v2i64)temp0); \
|
||||
in3 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp1); \
|
||||
in5 = (v16u8)__msa_ilvl_d((v2i64)temp2, (v2i64)temp2); \
|
||||
in7 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp3); \
|
||||
in9 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp6); \
|
||||
in11 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp7); \
|
||||
}
|
||||
|
||||
static void postproc_down_across_chroma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
|
||||
int32_t src_stride,
|
||||
int32_t dst_stride,
|
||||
int32_t cols, uint8_t *f)
|
||||
{
|
||||
uint8_t *p_src = src_ptr;
|
||||
uint8_t *p_dst = dst_ptr;
|
||||
uint8_t *f_orig = f;
|
||||
uint8_t *p_dst_st = dst_ptr;
|
||||
uint16_t col;
|
||||
uint64_t out0, out1, out2, out3;
|
||||
v16u8 above2, above1, below2, below1, src, ref, ref_temp;
|
||||
v16u8 inter0, inter1, inter2, inter3, inter4, inter5;
|
||||
v16u8 inter6, inter7, inter8, inter9, inter10, inter11;
|
||||
|
||||
for (col = (cols / 16); col--;)
|
||||
{
|
||||
ref = LD_UB(f);
|
||||
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
|
||||
src = LD_UB(p_src);
|
||||
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
|
||||
above2 = LD_UB(p_src + 3 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
|
||||
above1 = LD_UB(p_src + 4 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
|
||||
src = LD_UB(p_src + 5 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
|
||||
below1 = LD_UB(p_src + 6 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
|
||||
below2 = LD_UB(p_src + 7 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
|
||||
above2 = LD_UB(p_src + 8 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
|
||||
above1 = LD_UB(p_src + 9 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
|
||||
ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7,
|
||||
p_dst, dst_stride);
|
||||
|
||||
p_dst += 16;
|
||||
p_src += 16;
|
||||
f += 16;
|
||||
}
|
||||
|
||||
if (0 != (cols / 16))
|
||||
{
|
||||
ref = LD_UB(f);
|
||||
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
|
||||
src = LD_UB(p_src);
|
||||
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
|
||||
above2 = LD_UB(p_src + 3 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
|
||||
above1 = LD_UB(p_src + 4 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
|
||||
src = LD_UB(p_src + 5 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
|
||||
below1 = LD_UB(p_src + 6 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
|
||||
below2 = LD_UB(p_src + 7 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
|
||||
above2 = LD_UB(p_src + 8 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
|
||||
above1 = LD_UB(p_src + 9 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
|
||||
out0 = __msa_copy_u_d((v2i64)inter0, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)inter1, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)inter2, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)inter3, 0);
|
||||
SD4(out0, out1, out2, out3, p_dst, dst_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)inter4, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)inter5, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)inter6, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)inter7, 0);
|
||||
SD4(out0, out1, out2, out3, p_dst + 4 * dst_stride, dst_stride);
|
||||
}
|
||||
|
||||
f = f_orig;
|
||||
p_dst = dst_ptr - 2;
|
||||
LD_UB8(p_dst, dst_stride,
|
||||
inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7);
|
||||
|
||||
for (col = 0; col < (cols / 8); ++col)
|
||||
{
|
||||
ref = LD_UB(f);
|
||||
f += 8;
|
||||
VP8_TRANSPOSE12x8_UB_UB(inter0, inter1, inter2, inter3,
|
||||
inter4, inter5, inter6, inter7,
|
||||
inter8, inter9, inter10, inter11);
|
||||
if (0 == col)
|
||||
{
|
||||
above2 = inter2;
|
||||
above1 = inter2;
|
||||
}
|
||||
else
|
||||
{
|
||||
above2 = inter0;
|
||||
above1 = inter1;
|
||||
}
|
||||
src = inter2;
|
||||
below1 = inter3;
|
||||
below2 = inter4;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2,
|
||||
ref_temp, inter2);
|
||||
above2 = inter5;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2,
|
||||
ref_temp, inter3);
|
||||
above1 = inter6;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1,
|
||||
ref_temp, inter4);
|
||||
src = inter7;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3);
|
||||
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src,
|
||||
ref_temp, inter5);
|
||||
below1 = inter8;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4);
|
||||
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1,
|
||||
ref_temp, inter6);
|
||||
below2 = inter9;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2,
|
||||
ref_temp, inter7);
|
||||
if (col == (cols / 8 - 1))
|
||||
{
|
||||
above2 = inter9;
|
||||
}
|
||||
else
|
||||
{
|
||||
above2 = inter10;
|
||||
}
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2,
|
||||
ref_temp, inter8);
|
||||
if (col == (cols / 8 - 1))
|
||||
{
|
||||
above1 = inter9;
|
||||
}
|
||||
else
|
||||
{
|
||||
above1 = inter11;
|
||||
}
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1,
|
||||
ref_temp, inter9);
|
||||
TRANSPOSE8x8_UB_UB(inter2, inter3, inter4, inter5, inter6, inter7,
|
||||
inter8, inter9, inter2, inter3, inter4, inter5,
|
||||
inter6, inter7, inter8, inter9);
|
||||
p_dst += 8;
|
||||
LD_UB2(p_dst, dst_stride, inter0, inter1);
|
||||
ST8x1_UB(inter2, p_dst_st);
|
||||
ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride));
|
||||
LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3);
|
||||
ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride));
|
||||
ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride));
|
||||
LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5);
|
||||
ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride));
|
||||
ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride));
|
||||
LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7);
|
||||
ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride));
|
||||
ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride));
|
||||
p_dst_st += 8;
|
||||
}
|
||||
}
|
||||
|
||||
static void postproc_down_across_luma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
|
||||
int32_t src_stride,
|
||||
int32_t dst_stride,
|
||||
int32_t cols, uint8_t *f)
|
||||
{
|
||||
uint8_t *p_src = src_ptr;
|
||||
uint8_t *p_dst = dst_ptr;
|
||||
uint8_t *p_dst_st = dst_ptr;
|
||||
uint8_t *f_orig = f;
|
||||
uint16_t col;
|
||||
v16u8 above2, above1, below2, below1;
|
||||
v16u8 src, ref, ref_temp;
|
||||
v16u8 inter0, inter1, inter2, inter3, inter4, inter5, inter6;
|
||||
v16u8 inter7, inter8, inter9, inter10, inter11;
|
||||
v16u8 inter12, inter13, inter14, inter15;
|
||||
|
||||
for (col = (cols / 16); col--;)
|
||||
{
|
||||
ref = LD_UB(f);
|
||||
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
|
||||
src = LD_UB(p_src);
|
||||
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
|
||||
above2 = LD_UB(p_src + 3 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
|
||||
above1 = LD_UB(p_src + 4 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
|
||||
src = LD_UB(p_src + 5 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
|
||||
below1 = LD_UB(p_src + 6 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
|
||||
below2 = LD_UB(p_src + 7 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
|
||||
above2 = LD_UB(p_src + 8 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
|
||||
above1 = LD_UB(p_src + 9 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
|
||||
src = LD_UB(p_src + 10 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter8);
|
||||
below1 = LD_UB(p_src + 11 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter9);
|
||||
below2 = LD_UB(p_src + 12 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter10);
|
||||
above2 = LD_UB(p_src + 13 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter11);
|
||||
above1 = LD_UB(p_src + 14 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter12);
|
||||
src = LD_UB(p_src + 15 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter13);
|
||||
below1 = LD_UB(p_src + 16 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter14);
|
||||
below2 = LD_UB(p_src + 17 * src_stride);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter15);
|
||||
ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7,
|
||||
p_dst, dst_stride);
|
||||
ST_UB8(inter8, inter9, inter10, inter11, inter12, inter13,
|
||||
inter14, inter15, p_dst + 8 * dst_stride, dst_stride);
|
||||
p_src += 16;
|
||||
p_dst += 16;
|
||||
f += 16;
|
||||
}
|
||||
|
||||
f = f_orig;
|
||||
p_dst = dst_ptr - 2;
|
||||
LD_UB8(p_dst, dst_stride,
|
||||
inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7);
|
||||
LD_UB8(p_dst + 8 * dst_stride, dst_stride,
|
||||
inter8, inter9, inter10, inter11, inter12, inter13,
|
||||
inter14, inter15);
|
||||
|
||||
for (col = 0; col < cols / 8; ++col)
|
||||
{
|
||||
ref = LD_UB(f);
|
||||
f += 8;
|
||||
TRANSPOSE12x16_B(inter0, inter1, inter2, inter3, inter4, inter5,
|
||||
inter6, inter7, inter8, inter9, inter10, inter11,
|
||||
inter12, inter13, inter14, inter15);
|
||||
if (0 == col)
|
||||
{
|
||||
above2 = inter2;
|
||||
above1 = inter2;
|
||||
}
|
||||
else
|
||||
{
|
||||
above2 = inter0;
|
||||
above1 = inter1;
|
||||
}
|
||||
|
||||
src = inter2;
|
||||
below1 = inter3;
|
||||
below2 = inter4;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2,
|
||||
ref_temp, inter2);
|
||||
above2 = inter5;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2,
|
||||
ref_temp, inter3);
|
||||
above1 = inter6;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1,
|
||||
ref_temp, inter4);
|
||||
src = inter7;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3);
|
||||
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src,
|
||||
ref_temp, inter5);
|
||||
below1 = inter8;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4);
|
||||
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1,
|
||||
ref_temp, inter6);
|
||||
below2 = inter9;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5);
|
||||
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2,
|
||||
ref_temp, inter7);
|
||||
if (col == (cols / 8 - 1))
|
||||
{
|
||||
above2 = inter9;
|
||||
}
|
||||
else
|
||||
{
|
||||
above2 = inter10;
|
||||
}
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6);
|
||||
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2,
|
||||
ref_temp, inter8);
|
||||
if (col == (cols / 8 - 1))
|
||||
{
|
||||
above1 = inter9;
|
||||
}
|
||||
else
|
||||
{
|
||||
above1 = inter11;
|
||||
}
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7);
|
||||
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1,
|
||||
ref_temp, inter9);
|
||||
VP8_TRANSPOSE8x16_UB_UB(inter2, inter3, inter4, inter5,
|
||||
inter6, inter7, inter8, inter9,
|
||||
inter2, inter3, inter4, inter5,
|
||||
inter6, inter7, inter8, inter9,
|
||||
inter10, inter11, inter12, inter13,
|
||||
inter14, inter15, above2, above1);
|
||||
|
||||
p_dst += 8;
|
||||
LD_UB2(p_dst, dst_stride, inter0, inter1);
|
||||
ST8x1_UB(inter2, p_dst_st);
|
||||
ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride));
|
||||
LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3);
|
||||
ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride));
|
||||
ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride));
|
||||
LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5);
|
||||
ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride));
|
||||
ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride));
|
||||
LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7);
|
||||
ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride));
|
||||
ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride));
|
||||
LD_UB2(p_dst + 8 * dst_stride, dst_stride, inter8, inter9);
|
||||
ST8x1_UB(inter10, (p_dst_st + 8 * dst_stride));
|
||||
ST8x1_UB(inter11, (p_dst_st + 9 * dst_stride));
|
||||
LD_UB2(p_dst + 10 * dst_stride, dst_stride, inter10, inter11);
|
||||
ST8x1_UB(inter12, (p_dst_st + 10 * dst_stride));
|
||||
ST8x1_UB(inter13, (p_dst_st + 11 * dst_stride));
|
||||
LD_UB2(p_dst + 12 * dst_stride, dst_stride, inter12, inter13);
|
||||
ST8x1_UB(inter14, (p_dst_st + 12 * dst_stride));
|
||||
ST8x1_UB(inter15, (p_dst_st + 13 * dst_stride));
|
||||
LD_UB2(p_dst + 14 * dst_stride, dst_stride, inter14, inter15);
|
||||
ST8x1_UB(above2, (p_dst_st + 14 * dst_stride));
|
||||
ST8x1_UB(above1, (p_dst_st + 15 * dst_stride));
|
||||
p_dst_st += 8;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_post_proc_down_and_across_mb_row_msa(uint8_t *src, uint8_t *dst,
|
||||
int32_t src_stride,
|
||||
int32_t dst_stride,
|
||||
int32_t cols, uint8_t *f,
|
||||
int32_t size)
|
||||
{
|
||||
if (8 == size)
|
||||
{
|
||||
postproc_down_across_chroma_msa(src, dst, src_stride, dst_stride,
|
||||
cols, f);
|
||||
}
|
||||
else if (16 == size)
|
||||
{
|
||||
postproc_down_across_luma_msa(src, dst, src_stride, dst_stride,
|
||||
cols, f);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch,
|
||||
int32_t rows, int32_t cols, int32_t flimit)
|
||||
{
|
||||
int32_t row, col, cnt;
|
||||
uint8_t *src_dup = src_ptr;
|
||||
v16u8 src0, src, tmp_orig;
|
||||
v16u8 tmp = { 0 };
|
||||
v16i8 zero = { 0 };
|
||||
v8u16 sum_h, src_r_h, src_l_h;
|
||||
v4u32 src_r_w, src_l_w;
|
||||
v4i32 flimit_vec;
|
||||
|
||||
flimit_vec = __msa_fill_w(flimit);
|
||||
for (row = rows; row--;)
|
||||
{
|
||||
int32_t sum_sq = 0;
|
||||
int32_t sum = 0;
|
||||
src0 = (v16u8)__msa_fill_b(src_dup[0]);
|
||||
ST8x1_UB(src0, (src_dup - 8));
|
||||
|
||||
src0 = (v16u8)__msa_fill_b(src_dup[cols - 1]);
|
||||
ST_UB(src0, src_dup + cols);
|
||||
src_dup[cols + 16] = src_dup[cols - 1];
|
||||
tmp_orig = (v16u8)__msa_ldi_b(0);
|
||||
tmp_orig[15] = tmp[15];
|
||||
src = LD_UB(src_dup - 8);
|
||||
src[15] = 0;
|
||||
ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
|
||||
src_r_w = __msa_dotp_u_w(src_r_h, src_r_h);
|
||||
src_l_w = __msa_dotp_u_w(src_l_h, src_l_h);
|
||||
sum_sq = HADD_SW_S32(src_r_w);
|
||||
sum_sq += HADD_SW_S32(src_l_w);
|
||||
sum_h = __msa_hadd_u_h(src, src);
|
||||
sum = HADD_UH_U32(sum_h);
|
||||
{
|
||||
v16u8 src7, src8, src_r, src_l;
|
||||
v16i8 mask;
|
||||
v8u16 add_r, add_l;
|
||||
v8i16 sub_r, sub_l, sum_r, sum_l, mask0, mask1;
|
||||
v4i32 sum_sq0, sum_sq1, sum_sq2, sum_sq3;
|
||||
v4i32 sub0, sub1, sub2, sub3;
|
||||
v4i32 sum0_w, sum1_w, sum2_w, sum3_w;
|
||||
v4i32 mul0, mul1, mul2, mul3;
|
||||
v4i32 total0, total1, total2, total3;
|
||||
v8i16 const8 = __msa_fill_h(8);
|
||||
|
||||
src7 = LD_UB(src_dup + 7);
|
||||
src8 = LD_UB(src_dup - 8);
|
||||
for (col = 0; col < (cols >> 4); ++col)
|
||||
{
|
||||
ILVRL_B2_UB(src7, src8, src_r, src_l);
|
||||
HSUB_UB2_SH(src_r, src_l, sub_r, sub_l);
|
||||
|
||||
sum_r[0] = sum + sub_r[0];
|
||||
for (cnt = 0; cnt < 7; ++cnt)
|
||||
{
|
||||
sum_r[cnt + 1] = sum_r[cnt] + sub_r[cnt + 1];
|
||||
}
|
||||
sum_l[0] = sum_r[7] + sub_l[0];
|
||||
for (cnt = 0; cnt < 7; ++cnt)
|
||||
{
|
||||
sum_l[cnt + 1] = sum_l[cnt] + sub_l[cnt + 1];
|
||||
}
|
||||
sum = sum_l[7];
|
||||
src = LD_UB(src_dup + 16 * col);
|
||||
ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
|
||||
src7 = (v16u8)((const8 + sum_r + (v8i16)src_r_h) >> 4);
|
||||
src8 = (v16u8)((const8 + sum_l + (v8i16)src_l_h) >> 4);
|
||||
tmp = (v16u8)__msa_pckev_b((v16i8)src8, (v16i8)src7);
|
||||
|
||||
HADD_UB2_UH(src_r, src_l, add_r, add_l);
|
||||
UNPCK_SH_SW(sub_r, sub0, sub1);
|
||||
UNPCK_SH_SW(sub_l, sub2, sub3);
|
||||
ILVR_H2_SW(zero, add_r, zero, add_l, sum0_w, sum2_w);
|
||||
ILVL_H2_SW(zero, add_r, zero, add_l, sum1_w, sum3_w);
|
||||
MUL4(sum0_w, sub0, sum1_w, sub1, sum2_w, sub2, sum3_w, sub3,
|
||||
mul0, mul1, mul2, mul3);
|
||||
sum_sq0[0] = sum_sq + mul0[0];
|
||||
for (cnt = 0; cnt < 3; ++cnt)
|
||||
{
|
||||
sum_sq0[cnt + 1] = sum_sq0[cnt] + mul0[cnt + 1];
|
||||
}
|
||||
sum_sq1[0] = sum_sq0[3] + mul1[0];
|
||||
for (cnt = 0; cnt < 3; ++cnt)
|
||||
{
|
||||
sum_sq1[cnt + 1] = sum_sq1[cnt] + mul1[cnt + 1];
|
||||
}
|
||||
sum_sq2[0] = sum_sq1[3] + mul2[0];
|
||||
for (cnt = 0; cnt < 3; ++cnt)
|
||||
{
|
||||
sum_sq2[cnt + 1] = sum_sq2[cnt] + mul2[cnt + 1];
|
||||
}
|
||||
sum_sq3[0] = sum_sq2[3] + mul3[0];
|
||||
for (cnt = 0; cnt < 3; ++cnt)
|
||||
{
|
||||
sum_sq3[cnt + 1] = sum_sq3[cnt] + mul3[cnt + 1];
|
||||
}
|
||||
sum_sq = sum_sq3[3];
|
||||
|
||||
UNPCK_SH_SW(sum_r, sum0_w, sum1_w);
|
||||
UNPCK_SH_SW(sum_l, sum2_w, sum3_w);
|
||||
total0 = sum_sq0 * __msa_ldi_w(15);
|
||||
total0 -= sum0_w * sum0_w;
|
||||
total1 = sum_sq1 * __msa_ldi_w(15);
|
||||
total1 -= sum1_w * sum1_w;
|
||||
total2 = sum_sq2 * __msa_ldi_w(15);
|
||||
total2 -= sum2_w * sum2_w;
|
||||
total3 = sum_sq3 * __msa_ldi_w(15);
|
||||
total3 -= sum3_w * sum3_w;
|
||||
total0 = (total0 < flimit_vec);
|
||||
total1 = (total1 < flimit_vec);
|
||||
total2 = (total2 < flimit_vec);
|
||||
total3 = (total3 < flimit_vec);
|
||||
PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1);
|
||||
mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0);
|
||||
tmp = __msa_bmz_v(tmp, src, (v16u8)mask);
|
||||
|
||||
if (col == 0)
|
||||
{
|
||||
uint64_t src_d;
|
||||
|
||||
src_d = __msa_copy_u_d((v2i64)tmp_orig, 1);
|
||||
SD(src_d, (src_dup - 8));
|
||||
}
|
||||
|
||||
src7 = LD_UB(src_dup + 16 * (col + 1) + 7);
|
||||
src8 = LD_UB(src_dup + 16 * (col + 1) - 8);
|
||||
ST_UB(tmp, (src_dup + (16 * col)));
|
||||
}
|
||||
|
||||
src_dup += pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows,
|
||||
int32_t cols, int32_t flimit)
|
||||
{
|
||||
int32_t row, col, cnt, i;
|
||||
const int16_t *rv3 = &vp8_rv_msa[63 & rand()];
|
||||
v4i32 flimit_vec;
|
||||
v16u8 dst7, dst8, dst_r_b, dst_l_b;
|
||||
v16i8 mask;
|
||||
v8u16 add_r, add_l;
|
||||
v8i16 dst_r_h, dst_l_h, sub_r, sub_l, mask0, mask1;
|
||||
v4i32 sub0, sub1, sub2, sub3, total0, total1, total2, total3;
|
||||
|
||||
flimit_vec = __msa_fill_w(flimit);
|
||||
|
||||
for (col = 0; col < (cols >> 4); ++col)
|
||||
{
|
||||
uint8_t *dst_tmp = &dst_ptr[col << 4];
|
||||
v16u8 dst;
|
||||
v16i8 zero = { 0 };
|
||||
v16u8 tmp[16];
|
||||
v8i16 mult0, mult1, rv2_0, rv2_1;
|
||||
v8i16 sum0_h = { 0 };
|
||||
v8i16 sum1_h = { 0 };
|
||||
v4i32 mul0 = { 0 };
|
||||
v4i32 mul1 = { 0 };
|
||||
v4i32 mul2 = { 0 };
|
||||
v4i32 mul3 = { 0 };
|
||||
v4i32 sum0_w, sum1_w, sum2_w, sum3_w;
|
||||
v4i32 add0, add1, add2, add3;
|
||||
const int16_t *rv2[16];
|
||||
|
||||
dst = LD_UB(dst_tmp);
|
||||
for (cnt = (col << 4), i = 0; i < 16; ++cnt)
|
||||
{
|
||||
rv2[i] = rv3 + ((cnt * 17) & 127);
|
||||
++i;
|
||||
}
|
||||
for (cnt = -8; cnt < 0; ++cnt)
|
||||
{
|
||||
ST_UB(dst, dst_tmp + cnt * pitch);
|
||||
}
|
||||
|
||||
dst = LD_UB((dst_tmp + (rows - 1) * pitch));
|
||||
for (cnt = rows; cnt < rows + 17; ++cnt)
|
||||
{
|
||||
ST_UB(dst, dst_tmp + cnt * pitch);
|
||||
}
|
||||
for (cnt = -8; cnt <= 6; ++cnt)
|
||||
{
|
||||
dst = LD_UB(dst_tmp + (cnt * pitch));
|
||||
UNPCK_UB_SH(dst, dst_r_h, dst_l_h);
|
||||
MUL2(dst_r_h, dst_r_h, dst_l_h, dst_l_h, mult0, mult1);
|
||||
mul0 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult0);
|
||||
mul1 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult0);
|
||||
mul2 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult1);
|
||||
mul3 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult1);
|
||||
ADD2(sum0_h, dst_r_h, sum1_h, dst_l_h, sum0_h, sum1_h);
|
||||
}
|
||||
|
||||
for (row = 0; row < (rows + 8); ++row)
|
||||
{
|
||||
for (i = 0; i < 8; ++i)
|
||||
{
|
||||
rv2_0[i] = *(rv2[i] + (row & 127));
|
||||
rv2_1[i] = *(rv2[i + 8] + (row & 127));
|
||||
}
|
||||
dst7 = LD_UB(dst_tmp + (7 * pitch));
|
||||
dst8 = LD_UB(dst_tmp - (8 * pitch));
|
||||
ILVRL_B2_UB(dst7, dst8, dst_r_b, dst_l_b);
|
||||
|
||||
HSUB_UB2_SH(dst_r_b, dst_l_b, sub_r, sub_l);
|
||||
UNPCK_SH_SW(sub_r, sub0, sub1);
|
||||
UNPCK_SH_SW(sub_l, sub2, sub3);
|
||||
sum0_h += sub_r;
|
||||
sum1_h += sub_l;
|
||||
|
||||
HADD_UB2_UH(dst_r_b, dst_l_b, add_r, add_l);
|
||||
|
||||
ILVRL_H2_SW(zero, add_r, add0, add1);
|
||||
ILVRL_H2_SW(zero, add_l, add2, add3);
|
||||
mul0 += add0 * sub0;
|
||||
mul1 += add1 * sub1;
|
||||
mul2 += add2 * sub2;
|
||||
mul3 += add3 * sub3;
|
||||
dst = LD_UB(dst_tmp);
|
||||
ILVRL_B2_SH(zero, dst, dst_r_h, dst_l_h);
|
||||
dst7 = (v16u8)((rv2_0 + sum0_h + dst_r_h) >> 4);
|
||||
dst8 = (v16u8)((rv2_1 + sum1_h + dst_l_h) >> 4);
|
||||
tmp[row & 15] = (v16u8)__msa_pckev_b((v16i8)dst8, (v16i8)dst7);
|
||||
|
||||
UNPCK_SH_SW(sum0_h, sum0_w, sum1_w);
|
||||
UNPCK_SH_SW(sum1_h, sum2_w, sum3_w);
|
||||
total0 = mul0 * __msa_ldi_w(15);
|
||||
total0 -= sum0_w * sum0_w;
|
||||
total1 = mul1 * __msa_ldi_w(15);
|
||||
total1 -= sum1_w * sum1_w;
|
||||
total2 = mul2 * __msa_ldi_w(15);
|
||||
total2 -= sum2_w * sum2_w;
|
||||
total3 = mul3 * __msa_ldi_w(15);
|
||||
total3 -= sum3_w * sum3_w;
|
||||
total0 = (total0 < flimit_vec);
|
||||
total1 = (total1 < flimit_vec);
|
||||
total2 = (total2 < flimit_vec);
|
||||
total3 = (total3 < flimit_vec);
|
||||
PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1);
|
||||
mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0);
|
||||
tmp[row & 15] = __msa_bmz_v(tmp[row & 15], dst, (v16u8)mask);
|
||||
|
||||
if (row >= 8)
|
||||
{
|
||||
ST_UB(tmp[(row - 8) & 15], (dst_tmp - 8 * pitch));
|
||||
}
|
||||
|
||||
dst_tmp += pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
|
||||
char blackclamp[16], char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
uint32_t width, uint32_t height,
|
||||
int32_t pitch)
|
||||
{
|
||||
uint32_t i, j;
|
||||
|
||||
for (i = 0; i < height / 2; ++i)
|
||||
{
|
||||
uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
|
||||
int8_t *ref0_ptr = (int8_t *) (noise + (rand() & 0xff));
|
||||
uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
|
||||
int8_t *ref1_ptr = (int8_t *) (noise + (rand() & 0xff));
|
||||
for (j = width / 16; j--;)
|
||||
{
|
||||
v16i8 temp00_s, temp01_s;
|
||||
v16u8 temp00, temp01, black_clamp, white_clamp;
|
||||
v16u8 pos0, ref0, pos1, ref1;
|
||||
v16i8 const127 = __msa_ldi_b(127);
|
||||
|
||||
pos0 = LD_UB(pos0_ptr);
|
||||
ref0 = LD_UB(ref0_ptr);
|
||||
pos1 = LD_UB(pos1_ptr);
|
||||
ref1 = LD_UB(ref1_ptr);
|
||||
black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
|
||||
white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
|
||||
temp00 = (pos0 < black_clamp);
|
||||
pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
|
||||
temp01 = (pos1 < black_clamp);
|
||||
pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
|
||||
XORI_B2_128_UB(pos0, pos1);
|
||||
temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
|
||||
temp00 = (v16u8)(temp00_s < pos0);
|
||||
pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
|
||||
temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
|
||||
temp01 = (temp01_s < pos1);
|
||||
pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
|
||||
XORI_B2_128_UB(pos0, pos1);
|
||||
pos0 += ref0;
|
||||
ST_UB(pos0, pos0_ptr);
|
||||
pos1 += ref1;
|
||||
ST_UB(pos1, pos1_ptr);
|
||||
pos0_ptr += 16;
|
||||
pos1_ptr += 16;
|
||||
ref0_ptr += 16;
|
||||
ref1_ptr += 16;
|
||||
}
|
||||
}
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,40 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropy.h"
|
||||
|
||||
const int vp8_mode_contexts[6][4] =
|
||||
{
|
||||
{
|
||||
/* 0 */
|
||||
7, 1, 1, 143,
|
||||
},
|
||||
{
|
||||
/* 1 */
|
||||
14, 18, 14, 107,
|
||||
},
|
||||
{
|
||||
/* 2 */
|
||||
135, 64, 57, 68,
|
||||
},
|
||||
{
|
||||
/* 3 */
|
||||
60, 56, 128, 65,
|
||||
},
|
||||
{
|
||||
/* 4 */
|
||||
159, 134, 128, 34,
|
||||
},
|
||||
{
|
||||
/* 5 */
|
||||
234, 188, 128, 28,
|
||||
},
|
||||
};
|
|
@ -1,25 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_MODECONT_H_
|
||||
#define VP8_COMMON_MODECONT_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern const int vp8_mode_contexts[6][4];
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_MODECONT_H_
|
|
@ -1,36 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_MV_H_
|
||||
#define VP8_COMMON_MV_H_
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
short row;
|
||||
short col;
|
||||
} MV;
|
||||
|
||||
typedef union int_mv
|
||||
{
|
||||
uint32_t as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates faster equality tests and copies */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_MV_H_
|
|
@ -1,282 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ONYX_H_
|
||||
#define VP8_COMMON_ONYX_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "vpx/vp8cx.h"
|
||||
#include "vpx/vpx_encoder.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "ppflags.h"
|
||||
|
||||
struct VP8_COMP;
|
||||
|
||||
/* Create/destroy static data structures. */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NORMAL = 0,
|
||||
FOURFIVE = 1,
|
||||
THREEFIVE = 2,
|
||||
ONETWO = 3
|
||||
|
||||
} VPX_SCALING;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x0,
|
||||
USAGE_STREAM_FROM_SERVER = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2,
|
||||
USAGE_CONSTANT_QUALITY = 0x3
|
||||
} END_USAGE;
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
MODE_REALTIME = 0x0,
|
||||
MODE_GOODQUALITY = 0x1,
|
||||
MODE_BESTQUALITY = 0x2,
|
||||
MODE_FIRSTPASS = 0x3,
|
||||
MODE_SECONDPASS = 0x4,
|
||||
MODE_SECONDPASS_BEST = 0x5
|
||||
} MODE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
FRAMEFLAGS_KEY = 1,
|
||||
FRAMEFLAGS_GOLDEN = 2,
|
||||
FRAMEFLAGS_ALTREF = 4
|
||||
} FRAMETYPE_FLAGS;
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
static INLINE void Scale2Ratio(int mode, int *hr, int *hs)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case NORMAL:
|
||||
*hr = 1;
|
||||
*hs = 1;
|
||||
break;
|
||||
case FOURFIVE:
|
||||
*hr = 4;
|
||||
*hs = 5;
|
||||
break;
|
||||
case THREEFIVE:
|
||||
*hr = 3;
|
||||
*hs = 5;
|
||||
break;
|
||||
case ONETWO:
|
||||
*hr = 1;
|
||||
*hs = 2;
|
||||
break;
|
||||
default:
|
||||
*hr = 1;
|
||||
*hs = 1;
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* 4 versions of bitstream defined:
|
||||
* 0 best quality/slowest decode, 3 lowest quality/fastest decode
|
||||
*/
|
||||
int Version;
|
||||
int Width;
|
||||
int Height;
|
||||
struct vpx_rational timebase;
|
||||
unsigned int target_bandwidth; /* kilobits per second */
|
||||
|
||||
/* Parameter used for applying denoiser.
|
||||
* For temporal denoiser: noise_sensitivity = 0 means off,
|
||||
* noise_sensitivity = 1 means temporal denoiser on for Y channel only,
|
||||
* noise_sensitivity = 2 means temporal denoiser on for all channels.
|
||||
* noise_sensitivity = 3 means aggressive denoising mode.
|
||||
* noise_sensitivity >= 4 means adaptive denoising mode.
|
||||
* Temporal denoiser is enabled via the configuration option:
|
||||
* CONFIG_TEMPORAL_DENOISING.
|
||||
* For spatial denoiser: noise_sensitivity controls the amount of
|
||||
* pre-processing blur: noise_sensitivity = 0 means off.
|
||||
* Spatial denoiser invoked under !CONFIG_TEMPORAL_DENOISING.
|
||||
*/
|
||||
int noise_sensitivity;
|
||||
|
||||
/* parameter used for sharpening output: recommendation 0: */
|
||||
int Sharpness;
|
||||
int cpu_used;
|
||||
unsigned int rc_max_intra_bitrate_pct;
|
||||
unsigned int screen_content_mode;
|
||||
|
||||
/* mode ->
|
||||
*(0)=Realtime/Live Encoding. This mode is optimized for realtim
|
||||
* encoding (for example, capturing a television signal or feed
|
||||
* from a live camera). ( speed setting controls how fast )
|
||||
*(1)=Good Quality Fast Encoding. The encoder balances quality with
|
||||
* the amount of time it takes to encode the output. ( speed
|
||||
* setting controls how fast )
|
||||
*(2)=One Pass - Best Quality. The encoder places priority on the
|
||||
* quality of the output over encoding speed. The output is
|
||||
* compressed at the highest possible quality. This option takes
|
||||
* the longest amount of time to encode. ( speed setting ignored
|
||||
* )
|
||||
*(3)=Two Pass - First Pass. The encoder generates a file of
|
||||
* statistics for use in the second encoding pass. ( speed
|
||||
* setting controls how fast )
|
||||
*(4)=Two Pass - Second Pass. The encoder uses the statistics that
|
||||
* were generated in the first encoding pass to create the
|
||||
* compressed output. ( speed setting controls how fast )
|
||||
*(5)=Two Pass - Second Pass Best. The encoder uses the statistics
|
||||
* that were generated in the first encoding pass to create the
|
||||
* compressed output using the highest possible quality, and
|
||||
* taking a longer amount of time to encode.. ( speed setting
|
||||
* ignored )
|
||||
*/
|
||||
int Mode;
|
||||
|
||||
/* Key Framing Operations */
|
||||
int auto_key; /* automatically detect cut scenes */
|
||||
int key_freq; /* maximum distance to key frame. */
|
||||
|
||||
/* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */
|
||||
int allow_lag;
|
||||
int lag_in_frames; /* how many frames lag before we start encoding */
|
||||
|
||||
/*
|
||||
* DATARATE CONTROL OPTIONS
|
||||
*/
|
||||
|
||||
int end_usage; /* vbr or cbr */
|
||||
|
||||
/* buffer targeting aggressiveness */
|
||||
int under_shoot_pct;
|
||||
int over_shoot_pct;
|
||||
|
||||
/* buffering parameters */
|
||||
int64_t starting_buffer_level;
|
||||
int64_t optimal_buffer_level;
|
||||
int64_t maximum_buffer_size;
|
||||
|
||||
int64_t starting_buffer_level_in_ms;
|
||||
int64_t optimal_buffer_level_in_ms;
|
||||
int64_t maximum_buffer_size_in_ms;
|
||||
|
||||
/* controlling quality */
|
||||
int fixed_q;
|
||||
int worst_allowed_q;
|
||||
int best_allowed_q;
|
||||
int cq_level;
|
||||
|
||||
/* allow internal resizing */
|
||||
int allow_spatial_resampling;
|
||||
int resample_down_water_mark;
|
||||
int resample_up_water_mark;
|
||||
|
||||
/* allow internal frame rate alterations */
|
||||
int allow_df;
|
||||
int drop_frames_water_mark;
|
||||
|
||||
/* two pass datarate control */
|
||||
int two_pass_vbrbias;
|
||||
int two_pass_vbrmin_section;
|
||||
int two_pass_vbrmax_section;
|
||||
|
||||
/*
|
||||
* END DATARATE CONTROL OPTIONS
|
||||
*/
|
||||
|
||||
/* these parameters aren't to be used in final build don't use!!! */
|
||||
int play_alternate;
|
||||
int alt_freq;
|
||||
int alt_q;
|
||||
int key_q;
|
||||
int gold_q;
|
||||
|
||||
|
||||
int multi_threaded; /* how many threads to run the encoder on */
|
||||
int token_partitions; /* how many token partitions to create */
|
||||
|
||||
/* early breakout threshold: for video conf recommend 800 */
|
||||
int encode_breakout;
|
||||
|
||||
/* Bitfield defining the error resiliency features to enable.
|
||||
* Can provide decodable frames after losses in previous
|
||||
* frames and decodable partitions after losses in the same frame.
|
||||
*/
|
||||
unsigned int error_resilient_mode;
|
||||
|
||||
int arnr_max_frames;
|
||||
int arnr_strength;
|
||||
int arnr_type;
|
||||
|
||||
vpx_fixed_buf_t two_pass_stats_in;
|
||||
struct vpx_codec_pkt_list *output_pkt_list;
|
||||
|
||||
vp8e_tuning tuning;
|
||||
|
||||
/* Temporal scaling parameters */
|
||||
unsigned int number_of_layers;
|
||||
unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY];
|
||||
unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY];
|
||||
unsigned int periodicity;
|
||||
unsigned int layer_id[VPX_TS_MAX_PERIODICITY];
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
/* Number of total resolutions encoded */
|
||||
unsigned int mr_total_resolutions;
|
||||
|
||||
/* Current encoder ID */
|
||||
unsigned int mr_encoder_id;
|
||||
|
||||
/* Down-sampling factor */
|
||||
vpx_rational_t mr_down_sampling_factor;
|
||||
|
||||
/* Memory location to store low-resolution encoder's mode info */
|
||||
void* mr_low_res_mode_info;
|
||||
#endif
|
||||
} VP8_CONFIG;
|
||||
|
||||
|
||||
void vp8_initialize();
|
||||
|
||||
struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf);
|
||||
void vp8_remove_compressor(struct VP8_COMP* *comp);
|
||||
|
||||
void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
|
||||
void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
|
||||
|
||||
int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
|
||||
int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
|
||||
int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
|
||||
|
||||
int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags);
|
||||
int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags);
|
||||
int vp8_get_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
int vp8_set_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
int vp8_update_entropy(struct VP8_COMP* comp, int update);
|
||||
int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
|
||||
int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols);
|
||||
int vp8_set_internal_size(struct VP8_COMP* comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
|
||||
int vp8_get_quantizer(struct VP8_COMP* c);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ONYX_H_
|
|
@ -1,185 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ONYXC_INT_H_
|
||||
#define VP8_COMMON_ONYXC_INT_H_
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "loopfilter.h"
|
||||
#include "entropymv.h"
|
||||
#include "entropy.h"
|
||||
#if CONFIG_POSTPROC
|
||||
#include "postproc.h"
|
||||
#endif
|
||||
|
||||
/*#ifdef PACKET_TESTING*/
|
||||
#include "header.h"
|
||||
/*#endif*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MINQ 0
|
||||
#define MAXQ 127
|
||||
#define QINDEX_RANGE (MAXQ + 1)
|
||||
|
||||
#define NUM_YV12_BUFFERS 4
|
||||
|
||||
#define MAX_PARTITIONS 9
|
||||
|
||||
typedef struct frame_contexts
|
||||
{
|
||||
vp8_prob bmode_prob [VP8_BINTRAMODES-1];
|
||||
vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */
|
||||
vp8_prob uv_mode_prob [VP8_UV_MODES-1];
|
||||
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
|
||||
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
|
||||
MV_CONTEXT mvc[2];
|
||||
} FRAME_CONTEXT;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ONE_PARTITION = 0,
|
||||
TWO_PARTITION = 1,
|
||||
FOUR_PARTITION = 2,
|
||||
EIGHT_PARTITION = 3
|
||||
} TOKEN_PARTITION;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
RECON_CLAMP_REQUIRED = 0,
|
||||
RECON_CLAMP_NOTREQUIRED = 1
|
||||
} CLAMP_TYPE;
|
||||
|
||||
typedef struct VP8Common
|
||||
|
||||
{
|
||||
struct vpx_internal_error_info error;
|
||||
|
||||
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
|
||||
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
|
||||
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
|
||||
|
||||
int Width;
|
||||
int Height;
|
||||
int horiz_scale;
|
||||
int vert_scale;
|
||||
|
||||
CLAMP_TYPE clamp_type;
|
||||
|
||||
YV12_BUFFER_CONFIG *frame_to_show;
|
||||
|
||||
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
|
||||
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
|
||||
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
|
||||
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG post_proc_buffer_int;
|
||||
int post_proc_buffer_int_used;
|
||||
unsigned char *pp_limits_buffer; /* post-processing filter coefficients */
|
||||
#endif
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
|
||||
int frame_flags;
|
||||
int MBs;
|
||||
int mb_rows;
|
||||
int mb_cols;
|
||||
int mode_info_stride;
|
||||
|
||||
/* profile settings */
|
||||
int mb_no_coeff_skip;
|
||||
int no_lpf;
|
||||
int use_bilinear_mc_filter;
|
||||
int full_pixel;
|
||||
|
||||
int base_qindex;
|
||||
|
||||
int y1dc_delta_q;
|
||||
int y2dc_delta_q;
|
||||
int y2ac_delta_q;
|
||||
int uvdc_delta_q;
|
||||
int uvac_delta_q;
|
||||
|
||||
/* We allocate a MODE_INFO struct for each macroblock, together with
|
||||
an extra row on top and column on the left to simplify prediction. */
|
||||
|
||||
MODE_INFO *mip; /* Base of allocated array */
|
||||
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
|
||||
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
|
||||
#endif
|
||||
MODE_INFO *show_frame_mi; /* MODE_INFO for the last decoded frame
|
||||
to show */
|
||||
LOOPFILTERTYPE filter_type;
|
||||
|
||||
loop_filter_info_n lf_info;
|
||||
|
||||
int filter_level;
|
||||
int last_sharpness_level;
|
||||
int sharpness_level;
|
||||
|
||||
int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
|
||||
|
||||
int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
|
||||
int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
|
||||
|
||||
int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
|
||||
|
||||
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
|
||||
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
|
||||
ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
|
||||
|
||||
FRAME_CONTEXT lfc; /* last frame entropy */
|
||||
FRAME_CONTEXT fc; /* this frame entropy */
|
||||
|
||||
unsigned int current_video_frame;
|
||||
|
||||
int version;
|
||||
|
||||
TOKEN_PARTITION multi_token_partition;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
VP8_HEADER oh;
|
||||
#endif
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
double bitrate;
|
||||
double framerate;
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
int processor_core_count;
|
||||
#endif
|
||||
#if CONFIG_POSTPROC
|
||||
struct postproc_state postproc_state;
|
||||
#endif
|
||||
int cpu_caps;
|
||||
} VP8_COMMON;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ONYXC_INT_H_
|
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ONYXD_H_
|
||||
#define VP8_COMMON_ONYXD_H_
|
||||
|
||||
|
||||
/* Create/destroy static data structures. */
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "ppflags.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vp8.h"
|
||||
|
||||
struct VP8D_COMP;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int Width;
|
||||
int Height;
|
||||
int Version;
|
||||
int postprocess;
|
||||
int max_threads;
|
||||
int error_concealment;
|
||||
} VP8D_CONFIG;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
VP8D_OK = 0
|
||||
} VP8D_SETTING;
|
||||
|
||||
void vp8dx_initialize(void);
|
||||
|
||||
void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x);
|
||||
|
||||
int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
|
||||
|
||||
int vp8dx_receive_compressed_data(struct VP8D_COMP* comp,
|
||||
size_t size, const uint8_t *dest,
|
||||
int64_t time_stamp);
|
||||
int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
|
||||
|
||||
vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif // VP8_COMMON_ONYXD_H_
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_POSTPROC_H_
|
||||
#define VP8_COMMON_POSTPROC_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
struct postproc_state
|
||||
{
|
||||
int last_q;
|
||||
int last_noise;
|
||||
char noise[3072];
|
||||
int last_base_qindex;
|
||||
int last_frame_valid;
|
||||
DECLARE_ALIGNED(16, char, blackclamp[16]);
|
||||
DECLARE_ALIGNED(16, char, whiteclamp[16]);
|
||||
DECLARE_ALIGNED(16, char, bothclamp[16]);
|
||||
};
|
||||
#include "onyxc_int.h"
|
||||
#include "ppflags.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
|
||||
vp8_ppflags_t *flags);
|
||||
|
||||
|
||||
void vp8_de_noise(struct VP8Common *oci,
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *post,
|
||||
int q,
|
||||
int low_var_thresh,
|
||||
int flag,
|
||||
int uvfilter);
|
||||
|
||||
void vp8_deblock(struct VP8Common *oci,
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *post,
|
||||
int q,
|
||||
int low_var_thresh,
|
||||
int flag);
|
||||
|
||||
#define MFQE_PRECISION 4
|
||||
|
||||
void vp8_multiframe_quality_enhance(struct VP8Common *cm);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_POSTPROC_H_
|
|
@ -1,49 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_PPFLAGS_H_
|
||||
#define VP8_COMMON_PPFLAGS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
enum
|
||||
{
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
|
||||
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
|
||||
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
|
||||
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
|
||||
VP8D_DEBUG_DRAW_MV = 1<<7,
|
||||
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
|
||||
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9,
|
||||
VP8D_MFQE = 1<<10
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int post_proc_flag;
|
||||
int deblocking_level;
|
||||
int noise_level;
|
||||
int display_ref_frame_flag;
|
||||
int display_mb_modes_flag;
|
||||
int display_b_modes_flag;
|
||||
int display_mv_flag;
|
||||
} vp8_ppflags_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_PPFLAGS_H_
|
|
@ -1,135 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "quant_common.h"
|
||||
|
||||
static const int dc_qlookup[QINDEX_RANGE] =
|
||||
{
|
||||
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
|
||||
18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
|
||||
29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
|
||||
44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
|
||||
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
|
||||
75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
|
||||
91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
|
||||
122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
|
||||
};
|
||||
|
||||
static const int ac_qlookup[QINDEX_RANGE] =
|
||||
{
|
||||
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
|
||||
78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
|
||||
110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
|
||||
155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
|
||||
213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
|
||||
};
|
||||
|
||||
|
||||
int vp8_dc_quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[ QIndex ];
|
||||
return retval;
|
||||
}
|
||||
|
||||
int vp8_dc2quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[ QIndex ] * 2;
|
||||
return retval;
|
||||
|
||||
}
|
||||
int vp8_dc_uv_quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[ QIndex ];
|
||||
|
||||
if (retval > 132)
|
||||
retval = 132;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
int vp8_ac_yquant(int QIndex)
|
||||
{
|
||||
int retval;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = ac_qlookup[ QIndex ];
|
||||
return retval;
|
||||
}
|
||||
|
||||
int vp8_ac2quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
/* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
|
||||
* The smallest precision for that is '(x*6349) >> 12' but 16 is a good
|
||||
* word size. */
|
||||
retval = (ac_qlookup[ QIndex ] * 101581) >> 16;
|
||||
|
||||
if (retval < 8)
|
||||
retval = 8;
|
||||
|
||||
return retval;
|
||||
}
|
||||
int vp8_ac_uv_quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = ac_qlookup[ QIndex ];
|
||||
return retval;
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_QUANT_COMMON_H_
|
||||
#define VP8_COMMON_QUANT_COMMON_H_
|
||||
|
||||
|
||||
#include "string.h"
|
||||
#include "blockd.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern int vp8_ac_yquant(int QIndex);
|
||||
extern int vp8_dc_quant(int QIndex, int Delta);
|
||||
extern int vp8_dc2quant(int QIndex, int Delta);
|
||||
extern int vp8_ac2quant(int QIndex, int Delta);
|
||||
extern int vp8_dc_uv_quant(int QIndex, int Delta);
|
||||
extern int vp8_ac_uv_quant(int QIndex, int Delta);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_QUANT_COMMON_H_
|
|
@ -1,544 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "blockd.h"
|
||||
#include "reconinter.h"
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
#include "onyxc_int.h"
|
||||
#endif
|
||||
|
||||
void vp8_copy_mem16x16_c(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride)
|
||||
{
|
||||
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
memcpy(dst, src, 16);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_c(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 8; r++)
|
||||
{
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x4_c(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
|
||||
{
|
||||
int r;
|
||||
unsigned char *pred_ptr = d->predictor;
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
pred_ptr[0] = ptr[0];
|
||||
pred_ptr[1] = ptr[1];
|
||||
pred_ptr[2] = ptr[2];
|
||||
pred_ptr[3] = ptr[3];
|
||||
pred_ptr += pitch;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
|
||||
{
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
|
||||
{
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
|
||||
{
|
||||
int r;
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
dst[0] = ptr[0];
|
||||
dst[1] = ptr[1];
|
||||
dst[2] = ptr[2];
|
||||
dst[3] = ptr[3];
|
||||
dst += dst_stride;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
|
||||
{
|
||||
unsigned char *uptr, *vptr;
|
||||
unsigned char *upred_ptr = &x->predictor[256];
|
||||
unsigned char *vpred_ptr = &x->predictor[320];
|
||||
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int offset;
|
||||
int pre_stride = x->pre.uv_stride;
|
||||
|
||||
/* calc uv motion vectors */
|
||||
mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1));
|
||||
mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1));
|
||||
mv_row /= 2;
|
||||
mv_col /= 2;
|
||||
mv_row &= x->fullpixel_mask;
|
||||
mv_col &= x->fullpixel_mask;
|
||||
|
||||
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8);
|
||||
vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8);
|
||||
}
|
||||
}
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
|
||||
{
|
||||
int i, j;
|
||||
int pre_stride = x->pre.uv_stride;
|
||||
unsigned char *base_pre;
|
||||
|
||||
/* build uv mvs */
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
int yoffset = i * 8 + j * 2;
|
||||
int uoffset = 16 + i * 2 + j;
|
||||
int voffset = 20 + i * 2 + j;
|
||||
|
||||
int temp;
|
||||
|
||||
temp = x->block[yoffset ].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+1].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
temp = x->block[yoffset ].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+1].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
|
||||
}
|
||||
}
|
||||
|
||||
base_pre = x->pre.u_buffer;
|
||||
for (i = 16; i < 20; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
|
||||
base_pre = x->pre.v_buffer;
|
||||
for (i = 20; i < 24; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
int dst_ystride)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->pre.y_stride;
|
||||
|
||||
ptr_base = x->pre.y_buffer;
|
||||
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7,
|
||||
dst_y, dst_ystride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(ptr, pre_stride, dst_y,
|
||||
dst_ystride);
|
||||
}
|
||||
}
|
||||
|
||||
static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
/* If the MV points so far into the UMV border that no visible pixels
|
||||
* are used for reconstruction, the subpel part of the MV can be
|
||||
* discarded and the MV limited to 16 pixels with equivalent results.
|
||||
*
|
||||
* This limit kicks in at 19 pixels for the top and left edges, for
|
||||
* the 16 pixels plus 3 taps right of the central pixel when subpel
|
||||
* filtering. The bottom and right edges use 16 pixels plus 2 pixels
|
||||
* left of the central pixel when filtering.
|
||||
*/
|
||||
if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
|
||||
mv->col = xd->mb_to_left_edge - (16 << 3);
|
||||
else if (mv->col > xd->mb_to_right_edge + (18 << 3))
|
||||
mv->col = xd->mb_to_right_edge + (16 << 3);
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
|
||||
mv->row = xd->mb_to_top_edge - (16 << 3);
|
||||
else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
|
||||
mv->row = xd->mb_to_bottom_edge + (16 << 3);
|
||||
}
|
||||
|
||||
/* A version of the above function for chroma block MVs.*/
|
||||
static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ?
|
||||
(xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
|
||||
mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ?
|
||||
(xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
|
||||
|
||||
mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ?
|
||||
(xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
|
||||
mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ?
|
||||
(xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
|
||||
}
|
||||
|
||||
void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_ystride,
|
||||
int dst_uvstride)
|
||||
{
|
||||
int offset;
|
||||
unsigned char *ptr;
|
||||
unsigned char *uptr, *vptr;
|
||||
|
||||
int_mv _16x16mv;
|
||||
|
||||
unsigned char *ptr_base = x->pre.y_buffer;
|
||||
int pre_stride = x->pre.y_stride;
|
||||
|
||||
_16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int;
|
||||
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
{
|
||||
clamp_mv_to_umv_border(&_16x16mv.as_mv, x);
|
||||
}
|
||||
|
||||
ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
|
||||
|
||||
if ( _16x16mv.as_int & 0x00070007)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
|
||||
}
|
||||
|
||||
/* calc uv motion vectors */
|
||||
_16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1));
|
||||
_16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1));
|
||||
_16x16mv.as_mv.row /= 2;
|
||||
_16x16mv.as_mv.col /= 2;
|
||||
_16x16mv.as_mv.row &= x->fullpixel_mask;
|
||||
_16x16mv.as_mv.col &= x->fullpixel_mask;
|
||||
|
||||
pre_stride >>= 1;
|
||||
offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
if ( _16x16mv.as_int & 0x00070007)
|
||||
{
|
||||
x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
|
||||
vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
|
||||
{
|
||||
int i;
|
||||
unsigned char *base_dst = x->dst.y_buffer;
|
||||
unsigned char *base_pre = x->pre.y_buffer;
|
||||
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
{
|
||||
BLOCKD *b;
|
||||
int dst_stride = x->dst.y_stride;
|
||||
|
||||
x->block[ 0].bmi = x->mode_info_context->bmi[ 0];
|
||||
x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
|
||||
x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
|
||||
x->block[10].bmi = x->mode_info_context->bmi[10];
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
{
|
||||
clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x);
|
||||
}
|
||||
|
||||
b = &x->block[ 0];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
b = &x->block[ 2];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
b = &x->block[ 8];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
b = &x->block[10];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 16; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
int dst_stride = x->dst.y_stride;
|
||||
|
||||
x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
|
||||
x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
{
|
||||
clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x);
|
||||
}
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
|
||||
else
|
||||
{
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
base_dst = x->dst.u_buffer;
|
||||
base_pre = x->pre.u_buffer;
|
||||
for (i = 16; i < 20; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
int dst_stride = x->dst.uv_stride;
|
||||
|
||||
/* Note: uv mvs already clamped in build_4x4uvmvs() */
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
|
||||
else
|
||||
{
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
|
||||
base_dst = x->dst.v_buffer;
|
||||
base_pre = x->pre.v_buffer;
|
||||
for (i = 20; i < 24; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
int dst_stride = x->dst.uv_stride;
|
||||
|
||||
/* Note: uv mvs already clamped in build_4x4uvmvs() */
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
|
||||
else
|
||||
{
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void build_4x4uvmvs(MACROBLOCKD *x)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
int yoffset = i * 8 + j * 2;
|
||||
int uoffset = 16 + i * 2 + j;
|
||||
int voffset = 20 + i * 2 + j;
|
||||
|
||||
int temp;
|
||||
|
||||
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);
|
||||
|
||||
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_inter_predictors_mb(MACROBLOCKD *xd)
|
||||
{
|
||||
if (xd->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
|
||||
xd->dst.u_buffer, xd->dst.v_buffer,
|
||||
xd->dst.y_stride, xd->dst.uv_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
build_4x4uvmvs(xd);
|
||||
build_inter4x4_predictors_mb(xd);
|
||||
}
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_RECONINTER_H_
|
||||
#define VP8_COMMON_RECONINTER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
|
||||
extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_ystride,
|
||||
int dst_uvstride);
|
||||
|
||||
|
||||
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
int dst_ystride);
|
||||
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
|
||||
unsigned char *base_pre,
|
||||
int pre_stride,
|
||||
vp8_subpix_fn_t sppf);
|
||||
|
||||
extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
|
||||
extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_RECONINTER_H_
|
|
@ -1,117 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
#include "blockd.h"
|
||||
#include "vp8/common/reconintra.h"
|
||||
#include "vp8/common/reconintra4x4.h"
|
||||
|
||||
enum {
|
||||
SIZE_16,
|
||||
SIZE_8,
|
||||
NUM_SIZES,
|
||||
};
|
||||
|
||||
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
|
||||
const uint8_t *above, const uint8_t *left);
|
||||
|
||||
static intra_pred_fn pred[4][NUM_SIZES];
|
||||
static intra_pred_fn dc_pred[2][2][NUM_SIZES];
|
||||
|
||||
static void vp8_init_intra_predictors_internal(void)
|
||||
{
|
||||
#define INIT_SIZE(sz) \
|
||||
pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
|
||||
pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
|
||||
pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
|
||||
\
|
||||
dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
|
||||
dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
|
||||
dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
|
||||
dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
|
||||
|
||||
INIT_SIZE(16);
|
||||
INIT_SIZE(8);
|
||||
vp8_init_intra4x4_predictors_internal();
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
unsigned char * yabove_row,
|
||||
unsigned char * yleft,
|
||||
int left_stride,
|
||||
unsigned char * ypred_ptr,
|
||||
int y_stride)
|
||||
{
|
||||
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
|
||||
DECLARE_ALIGNED(16, uint8_t, yleft_col[16]);
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
yleft_col[i] = yleft[i* left_stride];
|
||||
}
|
||||
|
||||
if (mode == DC_PRED)
|
||||
{
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_16];
|
||||
}
|
||||
else
|
||||
{
|
||||
fn = pred[mode][SIZE_16];
|
||||
}
|
||||
|
||||
fn(ypred_ptr, y_stride, yabove_row, yleft_col);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride)
|
||||
{
|
||||
MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
|
||||
unsigned char uleft_col[8];
|
||||
unsigned char vleft_col[8];
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
uleft_col[i] = uleft[i * left_stride];
|
||||
vleft_col[i] = vleft[i * left_stride];
|
||||
}
|
||||
|
||||
if (uvmode == DC_PRED)
|
||||
{
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_8];
|
||||
}
|
||||
else
|
||||
{
|
||||
fn = pred[uvmode][SIZE_8];
|
||||
}
|
||||
|
||||
fn(upred_ptr, pred_stride, uabove_row, uleft_col);
|
||||
fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
|
||||
}
|
||||
|
||||
void vp8_init_intra_predictors(void)
|
||||
{
|
||||
once(vp8_init_intra_predictors_internal);
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_RECONINTRA_H_
|
||||
#define VP8_COMMON_RECONINTRA_H_
|
||||
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
unsigned char *yabove_row,
|
||||
unsigned char *yleft,
|
||||
int left_stride,
|
||||
unsigned char *ypred_ptr,
|
||||
int y_stride);
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride);
|
||||
|
||||
void vp8_init_intra_predictors(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_RECONINTRA_H_
|
|
@ -1,54 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "blockd.h"
|
||||
|
||||
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
|
||||
const uint8_t *above, const uint8_t *left);
|
||||
|
||||
static intra_pred_fn pred[10];
|
||||
|
||||
void vp8_init_intra4x4_predictors_internal(void)
|
||||
{
|
||||
pred[B_DC_PRED] = vpx_dc_predictor_4x4;
|
||||
pred[B_TM_PRED] = vpx_tm_predictor_4x4;
|
||||
pred[B_VE_PRED] = vpx_ve_predictor_4x4;
|
||||
pred[B_HE_PRED] = vpx_he_predictor_4x4;
|
||||
pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
|
||||
pred[B_RD_PRED] = vpx_d135_predictor_4x4;
|
||||
pred[B_VR_PRED] = vpx_d117_predictor_4x4;
|
||||
pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
|
||||
pred[B_HD_PRED] = vpx_d153_predictor_4x4;
|
||||
pred[B_HU_PRED] = vpx_d207_predictor_4x4;
|
||||
}
|
||||
|
||||
void vp8_intra4x4_predict(unsigned char *above,
|
||||
unsigned char *yleft, int left_stride,
|
||||
B_PREDICTION_MODE b_mode,
|
||||
unsigned char *dst, int dst_stride,
|
||||
unsigned char top_left)
|
||||
{
|
||||
unsigned char Left[4];
|
||||
unsigned char Aboveb[12], *Above = Aboveb + 4;
|
||||
|
||||
Left[0] = yleft[0];
|
||||
Left[1] = yleft[left_stride];
|
||||
Left[2] = yleft[2 * left_stride];
|
||||
Left[3] = yleft[3 * left_stride];
|
||||
memcpy(Above, above, 8);
|
||||
Above[-1] = top_left;
|
||||
|
||||
pred[b_mode](dst, dst_stride, Above, Left);
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_RECONINTRA4X4_H_
|
||||
#define VP8_COMMON_RECONINTRA4X4_H_
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static void intra_prediction_down_copy(MACROBLOCKD *xd,
|
||||
unsigned char *above_right_src)
|
||||
{
|
||||
int dst_stride = xd->dst.y_stride;
|
||||
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
|
||||
|
||||
unsigned int *src_ptr = (unsigned int *)above_right_src;
|
||||
unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride);
|
||||
unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride);
|
||||
unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride);
|
||||
|
||||
*dst_ptr0 = *src_ptr;
|
||||
*dst_ptr1 = *src_ptr;
|
||||
*dst_ptr2 = *src_ptr;
|
||||
}
|
||||
|
||||
void vp8_intra4x4_predict(unsigned char *Above,
|
||||
unsigned char *yleft, int left_stride,
|
||||
B_PREDICTION_MODE b_mode,
|
||||
unsigned char *dst, int dst_stride,
|
||||
unsigned char top_left);
|
||||
|
||||
void vp8_init_intra4x4_predictors_internal(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_RECONINTRA4X4_H_
|
|
@ -1,19 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "./vpx_config.h"
|
||||
#define RTCD_C
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
|
||||
|
||||
void vp8_rtcd()
|
||||
{
|
||||
once(setup_rtcd_internal);
|
||||
}
|
|
@ -1,321 +0,0 @@
|
|||
sub vp8_common_forward_decls() {
|
||||
print <<EOF
|
||||
/*
|
||||
* VP8
|
||||
*/
|
||||
|
||||
struct blockd;
|
||||
struct macroblockd;
|
||||
struct loop_filter_info;
|
||||
|
||||
/* Encoder forward decls */
|
||||
struct block;
|
||||
struct macroblock;
|
||||
struct variance_vtable;
|
||||
union int_mv;
|
||||
struct yv12_buffer_config;
|
||||
EOF
|
||||
}
|
||||
forward_decls qw/vp8_common_forward_decls/;
|
||||
|
||||
#
|
||||
# system state
|
||||
#
|
||||
add_proto qw/void vp8_clear_system_state/, "";
|
||||
specialize qw/vp8_clear_system_state mmx/;
|
||||
$vp8_clear_system_state_mmx=vpx_reset_mmx_state;
|
||||
|
||||
#
|
||||
# Dequant
|
||||
#
|
||||
add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
|
||||
specialize qw/vp8_dequantize_b mmx media neon msa/;
|
||||
$vp8_dequantize_b_media=vp8_dequantize_b_v6;
|
||||
|
||||
add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
|
||||
specialize qw/vp8_dequant_idct_add mmx media neon dspr2 msa/;
|
||||
$vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6;
|
||||
$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
|
||||
|
||||
add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
|
||||
specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2 msa/;
|
||||
$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6;
|
||||
$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
|
||||
|
||||
add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
|
||||
specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2 msa/;
|
||||
$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6;
|
||||
$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
|
||||
|
||||
#
|
||||
# Loopfilter
|
||||
#
|
||||
add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
|
||||
specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2 msa/;
|
||||
$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6;
|
||||
$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
|
||||
|
||||
add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
|
||||
specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2 msa/;
|
||||
$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6;
|
||||
$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
|
||||
|
||||
add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
|
||||
specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2 msa/;
|
||||
$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6;
|
||||
$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
|
||||
|
||||
add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
|
||||
specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2 msa/;
|
||||
$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6;
|
||||
$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
|
||||
|
||||
|
||||
add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
|
||||
specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon msa/;
|
||||
$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
|
||||
$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx;
|
||||
$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
|
||||
$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6;
|
||||
$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
|
||||
$vp8_loop_filter_simple_mbv_msa=vp8_loop_filter_simple_vertical_edge_msa;
|
||||
|
||||
add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
|
||||
specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon msa/;
|
||||
$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c;
|
||||
$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx;
|
||||
$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2;
|
||||
$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6;
|
||||
$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
|
||||
$vp8_loop_filter_simple_mbh_msa=vp8_loop_filter_simple_horizontal_edge_msa;
|
||||
|
||||
add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
|
||||
specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon msa/;
|
||||
$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
|
||||
$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx;
|
||||
$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
|
||||
$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6;
|
||||
$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
|
||||
$vp8_loop_filter_simple_bv_msa=vp8_loop_filter_bvs_msa;
|
||||
|
||||
add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
|
||||
specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon msa/;
|
||||
$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c;
|
||||
$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx;
|
||||
$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2;
|
||||
$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6;
|
||||
$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon;
|
||||
$vp8_loop_filter_simple_bh_msa=vp8_loop_filter_bhs_msa;
|
||||
|
||||
#
|
||||
# IDCT
|
||||
#
|
||||
#idct16
|
||||
add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
|
||||
specialize qw/vp8_short_idct4x4llm mmx media neon dspr2 msa/;
|
||||
$vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual;
|
||||
$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;
|
||||
|
||||
#iwalsh1
|
||||
add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
|
||||
specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
|
||||
$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;
|
||||
# no asm yet
|
||||
|
||||
#iwalsh16
|
||||
add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
|
||||
specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2 msa/;
|
||||
$vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6;
|
||||
$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;
|
||||
|
||||
#idct1_scalar_add
|
||||
add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
|
||||
specialize qw/vp8_dc_only_idct_add mmx media neon dspr2 msa/;
|
||||
$vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6;
|
||||
$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
|
||||
|
||||
#
|
||||
# RECON
|
||||
#
|
||||
add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
|
||||
specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2 msa/;
|
||||
$vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6;
|
||||
$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;
|
||||
|
||||
add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
|
||||
specialize qw/vp8_copy_mem8x8 mmx media neon dspr2 msa/;
|
||||
$vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6;
|
||||
$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;
|
||||
|
||||
add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
|
||||
specialize qw/vp8_copy_mem8x4 mmx media neon dspr2 msa/;
|
||||
$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
|
||||
$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
|
||||
|
||||
#
|
||||
# Postproc
|
||||
#
|
||||
if (vpx_config("CONFIG_POSTPROC") eq "yes") {
|
||||
add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
|
||||
specialize qw/vp8_mbpost_proc_down mmx sse2 msa/;
|
||||
$vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm;
|
||||
|
||||
add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
|
||||
specialize qw/vp8_mbpost_proc_across_ip sse2 msa/;
|
||||
$vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm;
|
||||
|
||||
add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
|
||||
specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/;
|
||||
|
||||
add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
|
||||
specialize qw/vp8_plane_add_noise mmx sse2 msa/;
|
||||
$vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
|
||||
|
||||
add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
|
||||
# no asm yet
|
||||
|
||||
add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
|
||||
# no asm yet
|
||||
|
||||
add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
|
||||
# no asm yet
|
||||
|
||||
add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
|
||||
specialize qw/vp8_filter_by_weight16x16 sse2 msa/;
|
||||
|
||||
add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
|
||||
specialize qw/vp8_filter_by_weight8x8 sse2 msa/;
|
||||
|
||||
add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
|
||||
# no asm yet
|
||||
}
|
||||
|
||||
#
|
||||
# Subpixel
|
||||
#
|
||||
add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2 msa/;
|
||||
$vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6;
|
||||
$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;
|
||||
|
||||
add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2 msa/;
|
||||
$vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6;
|
||||
$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;
|
||||
|
||||
add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2 msa/;
|
||||
$vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
|
||||
$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
|
||||
|
||||
add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817
|
||||
specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2 msa/;
|
||||
$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
|
||||
$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
|
||||
|
||||
add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon msa/;
|
||||
$vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6;
|
||||
|
||||
add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon msa/;
|
||||
$vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6;
|
||||
|
||||
add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
specialize qw/vp8_bilinear_predict8x4 mmx media neon msa/;
|
||||
$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
|
||||
|
||||
add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
|
||||
specialize qw/vp8_bilinear_predict4x4 mmx media msa/;
|
||||
$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
|
||||
|
||||
#
|
||||
# Encoder functions below this point.
|
||||
#
|
||||
if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
|
||||
|
||||
#
|
||||
# Block copy
|
||||
#
|
||||
if ($opts{arch} =~ /x86/) {
|
||||
add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n";
|
||||
specialize qw/vp8_copy32xn sse2 sse3/;
|
||||
}
|
||||
|
||||
#
|
||||
# Forward DCT
|
||||
#
|
||||
add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
|
||||
specialize qw/vp8_short_fdct4x4 mmx sse2 media neon msa/;
|
||||
$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
|
||||
|
||||
add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
|
||||
specialize qw/vp8_short_fdct8x4 mmx sse2 media neon msa/;
|
||||
$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
|
||||
|
||||
add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
|
||||
specialize qw/vp8_short_walsh4x4 sse2 media neon msa/;
|
||||
$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
|
||||
|
||||
#
|
||||
# Quantizer
|
||||
#
|
||||
add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
|
||||
specialize qw/vp8_regular_quantize_b sse2 sse4_1 msa/;
|
||||
|
||||
add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
|
||||
specialize qw/vp8_fast_quantize_b sse2 ssse3 neon msa/;
|
||||
|
||||
#
|
||||
# Block subtraction
|
||||
#
|
||||
add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff";
|
||||
specialize qw/vp8_block_error mmx sse2 msa/;
|
||||
$vp8_block_error_sse2=vp8_block_error_xmm;
|
||||
|
||||
add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc";
|
||||
specialize qw/vp8_mbblock_error mmx sse2 msa/;
|
||||
$vp8_mbblock_error_sse2=vp8_mbblock_error_xmm;
|
||||
|
||||
add_proto qw/int vp8_mbuverror/, "struct macroblock *mb";
|
||||
specialize qw/vp8_mbuverror mmx sse2 msa/;
|
||||
$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
|
||||
|
||||
#
|
||||
# Motion search
|
||||
#
|
||||
add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
|
||||
specialize qw/vp8_full_search_sad sse3 sse4_1/;
|
||||
$vp8_full_search_sad_sse3=vp8_full_search_sadx3;
|
||||
$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8;
|
||||
|
||||
add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
|
||||
specialize qw/vp8_refining_search_sad sse3/;
|
||||
$vp8_refining_search_sad_sse3=vp8_refining_search_sadx4;
|
||||
|
||||
add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
|
||||
$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4;
|
||||
|
||||
#
|
||||
# Alt-ref Noise Reduction (ARNR)
|
||||
#
|
||||
if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
|
||||
add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count";
|
||||
specialize qw/vp8_temporal_filter_apply sse2 msa/;
|
||||
}
|
||||
|
||||
#
|
||||
# Denoiser filter
|
||||
#
|
||||
if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
|
||||
add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
|
||||
specialize qw/vp8_denoiser_filter sse2 neon msa/;
|
||||
add_proto qw/int vp8_denoiser_filter_uv/, "unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
|
||||
specialize qw/vp8_denoiser_filter_uv sse2 neon msa/;
|
||||
}
|
||||
|
||||
# End of encoder only functions
|
||||
}
|
||||
1;
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче