proposed-aom : remove vp8, vp9 from directory configure and vpx

Change-Id: Ibb9192913f235ace2f6b8ef3c0eeabc3fd4cab11
This commit is contained in:
Jim Bankoski 2016-01-19 14:12:00 -08:00
Родитель cb785ade5b
Коммит f0d007abdc
412 изменённых файлов: 22 добавлений и 154724 удалений

10
configure поставляемый
Просмотреть файл

@ -38,8 +38,6 @@ Advanced options:
${toggle_better_hw_compatibility}
enable encoder to produce streams with better
hardware decoder compatibility
${toggle_vp8} VP8 codec support
${toggle_vp9} VP9 codec support
${toggle_vp10} VP10 codec support
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
${toggle_postproc} postprocessing
@ -195,8 +193,6 @@ if [ ${doxy_major:-0} -ge 1 ]; then
fi
# disable codecs when their source directory does not exist
[ -d "${source_path}/vp8" ] || disable_feature vp8
[ -d "${source_path}/vp9" ] || disable_feature vp9
[ -d "${source_path}/vp10" ] || disable_feature vp10
# disable vp10 codec by default
@ -218,16 +214,10 @@ enable_feature os_support
enable_feature temporal_denoising
CODECS="
vp8_encoder
vp8_decoder
vp9_encoder
vp9_decoder
vp10_encoder
vp10_decoder
"
CODEC_FAMILIES="
vp8
vp9
vp10
"

Просмотреть файл

@ -1,190 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "alloccommon.h"
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
#include "onyxc_int.h"
#include "findnearmv.h"
#include "entropymode.h"
#include "systemdependent.h"
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
{
int i;
for (i = 0; i < NUM_YV12_BUFFERS; i++)
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
#if CONFIG_POSTPROC
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
if (oci->post_proc_buffer_int_used)
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
vpx_free(oci->pp_limits_buffer);
oci->pp_limits_buffer = NULL;
#endif
vpx_free(oci->above_context);
vpx_free(oci->mip);
#if CONFIG_ERROR_CONCEALMENT
vpx_free(oci->prev_mip);
oci->prev_mip = NULL;
#endif
oci->above_context = NULL;
oci->mip = NULL;
}
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
{
int i;
vp8_de_alloc_frame_buffers(oci);
/* our internal buffers are always multiples of 16 */
if ((width & 0xf) != 0)
width += 16 - (width & 0xf);
if ((height & 0xf) != 0)
height += 16 - (height & 0xf);
for (i = 0; i < NUM_YV12_BUFFERS; i++)
{
oci->fb_idx_ref_cnt[i] = 0;
oci->yv12_fb[i].flags = 0;
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
goto allocation_fail;
}
oci->new_fb_idx = 0;
oci->lst_fb_idx = 1;
oci->gld_fb_idx = 2;
oci->alt_fb_idx = 3;
oci->fb_idx_ref_cnt[0] = 1;
oci->fb_idx_ref_cnt[1] = 1;
oci->fb_idx_ref_cnt[2] = 1;
oci->fb_idx_ref_cnt[3] = 1;
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
goto allocation_fail;
oci->mb_rows = height >> 4;
oci->mb_cols = width >> 4;
oci->MBs = oci->mb_rows * oci->mb_cols;
oci->mode_info_stride = oci->mb_cols + 1;
oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
if (!oci->mip)
goto allocation_fail;
oci->mi = oci->mip + oci->mode_info_stride + 1;
/* Allocation of previous mode info will be done in vp8_decode_frame()
* as it is a decoder only data */
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
if (!oci->above_context)
goto allocation_fail;
#if CONFIG_POSTPROC
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
goto allocation_fail;
oci->post_proc_buffer_int_used = 0;
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
memset(oci->post_proc_buffer.buffer_alloc, 128,
oci->post_proc_buffer.frame_size);
/* Allocate buffer to store post-processing filter coefficients.
*
* Note: Round up mb_cols to support SIMD reads
*/
oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
if (!oci->pp_limits_buffer)
goto allocation_fail;
#endif
return 0;
allocation_fail:
vp8_de_alloc_frame_buffers(oci);
return 1;
}
void vp8_setup_version(VP8_COMMON *cm)
{
switch (cm->version)
{
case 0:
cm->no_lpf = 0;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 0;
cm->full_pixel = 0;
break;
case 1:
cm->no_lpf = 0;
cm->filter_type = SIMPLE_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 0;
break;
case 2:
cm->no_lpf = 1;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 0;
break;
case 3:
cm->no_lpf = 1;
cm->filter_type = SIMPLE_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 1;
break;
default:
/*4,5,6,7 are reserved for future use*/
cm->no_lpf = 0;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 0;
cm->full_pixel = 0;
break;
}
}
void vp8_create_common(VP8_COMMON *oci)
{
vp8_machine_specific_config(oci);
vp8_init_mbmode_probs(oci);
vp8_default_bmode_probs(oci->fc.bmode_prob);
oci->mb_no_coeff_skip = 1;
oci->no_lpf = 0;
oci->filter_type = NORMAL_LOOPFILTER;
oci->use_bilinear_mc_filter = 0;
oci->full_pixel = 0;
oci->multi_token_partition = ONE_PARTITION;
oci->clamp_type = RECON_CLAMP_REQUIRED;
/* Initialize reference frame sign bias structure to defaults */
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
/* Default disable buffer to buffer copying */
oci->copy_buffer_to_gf = 0;
oci->copy_buffer_to_arf = 0;
}
void vp8_remove_common(VP8_COMMON *oci)
{
vp8_de_alloc_frame_buffers(oci);
}

Просмотреть файл

@ -1,31 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ALLOCCOMMON_H_
#define VP8_COMMON_ALLOCCOMMON_H_
#include "onyxc_int.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp8_create_common(VP8_COMMON *oci);
void vp8_remove_common(VP8_COMMON *oci);
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci);
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height);
void vp8_setup_version(VP8_COMMON *oci);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_ALLOCCOMMON_H_

Просмотреть файл

@ -1,237 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_filter_block2d_bil_first_pass_armv6|
EXPORT |vp8_filter_block2d_bil_second_pass_armv6|
AREA |.text|, CODE, READONLY ; name this block of code
;-------------------------------------
; r0 unsigned char *src_ptr,
; r1 unsigned short *dst_ptr,
; r2 unsigned int src_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp8_filter
;-------------------------------------
; The output is transposed stroed in output array to make it easy for second pass filtering.
|vp8_filter_block2d_bil_first_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
ldr r4, [sp, #36] ; width
mov r12, r3 ; outer-loop counter
add r7, r2, r4 ; preload next row
pld [r0, r7]
sub r2, r2, r4 ; src increment for height loop
ldr r5, [r11] ; load up filter coefficients
mov r3, r3, lsl #1 ; height*2
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
mov r11, r1 ; save dst_ptr for each row
cmp r5, #128 ; if filter coef = 128, then skip the filter
beq bil_null_1st_filter
|bil_height_loop_1st_v6|
ldrb r6, [r0] ; load source data
ldrb r7, [r0, #1]
ldrb r8, [r0, #2]
mov lr, r4, lsr #2 ; 4-in-parellel loop counter
|bil_width_loop_1st_v6|
ldrb r9, [r0, #3]
ldrb r10, [r0, #4]
pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0]
pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1]
smuad r6, r6, r5 ; apply the filter
pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2]
smuad r7, r7, r5
pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3]
smuad r8, r8, r5
smuad r9, r9, r5
add r0, r0, #4
subs lr, lr, #1
add r6, r6, #0x40 ; round_shift_and_clamp
add r7, r7, #0x40
usat r6, #16, r6, asr #7
usat r7, #16, r7, asr #7
strh r6, [r1], r3 ; result is transposed and stored
add r8, r8, #0x40 ; round_shift_and_clamp
strh r7, [r1], r3
add r9, r9, #0x40
usat r8, #16, r8, asr #7
usat r9, #16, r9, asr #7
strh r8, [r1], r3 ; result is transposed and stored
ldrneb r6, [r0] ; load source data
strh r9, [r1], r3
ldrneb r7, [r0, #1]
ldrneb r8, [r0, #2]
bne bil_width_loop_1st_v6
add r0, r0, r2 ; move to next input row
subs r12, r12, #1
add r9, r2, r4, lsl #1 ; adding back block width
pld [r0, r9] ; preload next row
add r11, r11, #2 ; move over to next column
mov r1, r11
bne bil_height_loop_1st_v6
ldmia sp!, {r4 - r11, pc}
|bil_null_1st_filter|
|bil_height_loop_null_1st|
mov lr, r4, lsr #2 ; loop counter
|bil_width_loop_null_1st|
ldrb r6, [r0] ; load data
ldrb r7, [r0, #1]
ldrb r8, [r0, #2]
ldrb r9, [r0, #3]
strh r6, [r1], r3 ; store it to immediate buffer
add r0, r0, #4
strh r7, [r1], r3
subs lr, lr, #1
strh r8, [r1], r3
strh r9, [r1], r3
bne bil_width_loop_null_1st
subs r12, r12, #1
add r0, r0, r2 ; move to next input line
add r11, r11, #2 ; move over to next column
mov r1, r11
bne bil_height_loop_null_1st
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_filter_block2d_bil_first_pass_armv6|
;---------------------------------
; r0 unsigned short *src_ptr,
; r1 unsigned char *dst_ptr,
; r2 int dst_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp8_filter
;---------------------------------
|vp8_filter_block2d_bil_second_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
ldr r4, [sp, #36] ; width
ldr r5, [r11] ; load up filter coefficients
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
mov r11, r1
cmp r5, #128 ; if filter coef = 128, then skip the filter
beq bil_null_2nd_filter
|bil_height_loop_2nd|
ldr r6, [r0] ; load the data
ldr r8, [r0, #4]
ldrh r10, [r0, #8]
mov lr, r3, lsr #2 ; loop counter
|bil_width_loop_2nd|
pkhtb r7, r6, r8 ; src[1] | src[2]
pkhtb r9, r8, r10 ; src[3] | src[4]
smuad r6, r6, r5 ; apply filter
smuad r8, r8, r5 ; apply filter
subs lr, lr, #1
smuadx r7, r7, r5 ; apply filter
smuadx r9, r9, r5 ; apply filter
add r0, r0, #8
add r6, r6, #0x40 ; round_shift_and_clamp
add r7, r7, #0x40
usat r6, #8, r6, asr #7
usat r7, #8, r7, asr #7
strb r6, [r1], r2 ; the result is transposed back and stored
add r8, r8, #0x40 ; round_shift_and_clamp
strb r7, [r1], r2
add r9, r9, #0x40
usat r8, #8, r8, asr #7
usat r9, #8, r9, asr #7
strb r8, [r1], r2 ; the result is transposed back and stored
ldrne r6, [r0] ; load data
strb r9, [r1], r2
ldrne r8, [r0, #4]
ldrneh r10, [r0, #8]
bne bil_width_loop_2nd
subs r12, r12, #1
add r0, r0, #4 ; update src for next row
add r11, r11, #1
mov r1, r11
bne bil_height_loop_2nd
ldmia sp!, {r4 - r11, pc}
|bil_null_2nd_filter|
|bil_height_loop_null_2nd|
mov lr, r3, lsr #2
|bil_width_loop_null_2nd|
ldr r6, [r0], #4 ; load data
subs lr, lr, #1
ldr r8, [r0], #4
strb r6, [r1], r2 ; store data
mov r7, r6, lsr #16
strb r7, [r1], r2
mov r9, r8, lsr #16
strb r8, [r1], r2
strb r9, [r1], r2
bne bil_width_loop_null_2nd
subs r12, r12, #1
add r0, r0, #4
add r11, r11, #1
mov r1, r11
bne bil_height_loop_null_2nd
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_filter_block2d_second_pass_armv6|
END

Просмотреть файл

@ -1,186 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_copy_mem16x16_v6|
; ARM
; REQUIRE8
; PRESERVE8
AREA Block, CODE, READONLY ; name this block of code
;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|vp8_copy_mem16x16_v6| PROC
stmdb sp!, {r4 - r7}
;push {r4-r7}
;preload
pld [r0, #31] ; preload for next 16x16 block
ands r4, r0, #15
beq copy_mem16x16_fast
ands r4, r0, #7
beq copy_mem16x16_8
ands r4, r0, #3
beq copy_mem16x16_4
;copy one byte each time
ldrb r4, [r0]
ldrb r5, [r0, #1]
ldrb r6, [r0, #2]
ldrb r7, [r0, #3]
mov r12, #16
copy_mem16x16_1_loop
strb r4, [r2]
strb r5, [r2, #1]
strb r6, [r2, #2]
strb r7, [r2, #3]
ldrb r4, [r0, #4]
ldrb r5, [r0, #5]
ldrb r6, [r0, #6]
ldrb r7, [r0, #7]
subs r12, r12, #1
strb r4, [r2, #4]
strb r5, [r2, #5]
strb r6, [r2, #6]
strb r7, [r2, #7]
ldrb r4, [r0, #8]
ldrb r5, [r0, #9]
ldrb r6, [r0, #10]
ldrb r7, [r0, #11]
strb r4, [r2, #8]
strb r5, [r2, #9]
strb r6, [r2, #10]
strb r7, [r2, #11]
ldrb r4, [r0, #12]
ldrb r5, [r0, #13]
ldrb r6, [r0, #14]
ldrb r7, [r0, #15]
add r0, r0, r1
strb r4, [r2, #12]
strb r5, [r2, #13]
strb r6, [r2, #14]
strb r7, [r2, #15]
add r2, r2, r3
ldrneb r4, [r0]
ldrneb r5, [r0, #1]
ldrneb r6, [r0, #2]
ldrneb r7, [r0, #3]
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_1_loop
ldmia sp!, {r4 - r7}
;pop {r4-r7}
mov pc, lr
;copy 4 bytes each time
copy_mem16x16_4
ldr r4, [r0]
ldr r5, [r0, #4]
ldr r6, [r0, #8]
ldr r7, [r0, #12]
mov r12, #16
copy_mem16x16_4_loop
subs r12, r12, #1
add r0, r0, r1
str r4, [r2]
str r5, [r2, #4]
str r6, [r2, #8]
str r7, [r2, #12]
add r2, r2, r3
ldrne r4, [r0]
ldrne r5, [r0, #4]
ldrne r6, [r0, #8]
ldrne r7, [r0, #12]
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_4_loop
ldmia sp!, {r4 - r7}
;pop {r4-r7}
mov pc, lr
;copy 8 bytes each time
copy_mem16x16_8
sub r1, r1, #16
sub r3, r3, #16
mov r12, #16
copy_mem16x16_8_loop
ldmia r0!, {r4-r5}
;ldm r0, {r4-r5}
ldmia r0!, {r6-r7}
add r0, r0, r1
stmia r2!, {r4-r5}
subs r12, r12, #1
;stm r2, {r4-r5}
stmia r2!, {r6-r7}
add r2, r2, r3
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_8_loop
ldmia sp!, {r4 - r7}
;pop {r4-r7}
mov pc, lr
;copy 16 bytes each time
copy_mem16x16_fast
;sub r1, r1, #16
;sub r3, r3, #16
mov r12, #16
copy_mem16x16_fast_loop
ldmia r0, {r4-r7}
;ldm r0, {r4-r7}
add r0, r0, r1
subs r12, r12, #1
stmia r2, {r4-r7}
;stm r2, {r4-r7}
add r2, r2, r3
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_fast_loop
ldmia sp!, {r4 - r7}
;pop {r4-r7}
mov pc, lr
ENDP ; |vp8_copy_mem16x16_v6|
END

Просмотреть файл

@ -1,128 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_copy_mem8x4_v6|
; ARM
; REQUIRE8
; PRESERVE8
AREA Block, CODE, READONLY ; name this block of code
;void vp8_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|vp8_copy_mem8x4_v6| PROC
;push {r4-r5}
stmdb sp!, {r4-r5}
;preload
pld [r0]
pld [r0, r1]
pld [r0, r1, lsl #1]
ands r4, r0, #7
beq copy_mem8x4_fast
ands r4, r0, #3
beq copy_mem8x4_4
;copy 1 byte each time
ldrb r4, [r0]
ldrb r5, [r0, #1]
mov r12, #4
copy_mem8x4_1_loop
strb r4, [r2]
strb r5, [r2, #1]
ldrb r4, [r0, #2]
ldrb r5, [r0, #3]
subs r12, r12, #1
strb r4, [r2, #2]
strb r5, [r2, #3]
ldrb r4, [r0, #4]
ldrb r5, [r0, #5]
strb r4, [r2, #4]
strb r5, [r2, #5]
ldrb r4, [r0, #6]
ldrb r5, [r0, #7]
add r0, r0, r1
strb r4, [r2, #6]
strb r5, [r2, #7]
add r2, r2, r3
ldrneb r4, [r0]
ldrneb r5, [r0, #1]
bne copy_mem8x4_1_loop
ldmia sp!, {r4 - r5}
;pop {r4-r5}
mov pc, lr
;copy 4 bytes each time
copy_mem8x4_4
ldr r4, [r0]
ldr r5, [r0, #4]
mov r12, #4
copy_mem8x4_4_loop
subs r12, r12, #1
add r0, r0, r1
str r4, [r2]
str r5, [r2, #4]
add r2, r2, r3
ldrne r4, [r0]
ldrne r5, [r0, #4]
bne copy_mem8x4_4_loop
ldmia sp!, {r4-r5}
;pop {r4-r5}
mov pc, lr
;copy 8 bytes each time
copy_mem8x4_fast
;sub r1, r1, #8
;sub r3, r3, #8
mov r12, #4
copy_mem8x4_fast_loop
ldmia r0, {r4-r5}
;ldm r0, {r4-r5}
add r0, r0, r1
subs r12, r12, #1
stmia r2, {r4-r5}
;stm r2, {r4-r5}
add r2, r2, r3
bne copy_mem8x4_fast_loop
ldmia sp!, {r4-r5}
;pop {r4-r5}
mov pc, lr
ENDP ; |vp8_copy_mem8x4_v6|
END

Просмотреть файл

@ -1,128 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_copy_mem8x8_v6|
; ARM
; REQUIRE8
; PRESERVE8
AREA Block, CODE, READONLY ; name this block of code
;void copy_mem8x8_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|vp8_copy_mem8x8_v6| PROC
;push {r4-r5}
stmdb sp!, {r4-r5}
;preload
pld [r0]
pld [r0, r1]
pld [r0, r1, lsl #1]
ands r4, r0, #7
beq copy_mem8x8_fast
ands r4, r0, #3
beq copy_mem8x8_4
;copy 1 byte each time
ldrb r4, [r0]
ldrb r5, [r0, #1]
mov r12, #8
copy_mem8x8_1_loop
strb r4, [r2]
strb r5, [r2, #1]
ldrb r4, [r0, #2]
ldrb r5, [r0, #3]
subs r12, r12, #1
strb r4, [r2, #2]
strb r5, [r2, #3]
ldrb r4, [r0, #4]
ldrb r5, [r0, #5]
strb r4, [r2, #4]
strb r5, [r2, #5]
ldrb r4, [r0, #6]
ldrb r5, [r0, #7]
add r0, r0, r1
strb r4, [r2, #6]
strb r5, [r2, #7]
add r2, r2, r3
ldrneb r4, [r0]
ldrneb r5, [r0, #1]
bne copy_mem8x8_1_loop
ldmia sp!, {r4 - r5}
;pop {r4-r5}
mov pc, lr
;copy 4 bytes each time
copy_mem8x8_4
ldr r4, [r0]
ldr r5, [r0, #4]
mov r12, #8
copy_mem8x8_4_loop
subs r12, r12, #1
add r0, r0, r1
str r4, [r2]
str r5, [r2, #4]
add r2, r2, r3
ldrne r4, [r0]
ldrne r5, [r0, #4]
bne copy_mem8x8_4_loop
ldmia sp!, {r4 - r5}
;pop {r4-r5}
mov pc, lr
;copy 8 bytes each time
copy_mem8x8_fast
;sub r1, r1, #8
;sub r3, r3, #8
mov r12, #8
copy_mem8x8_fast_loop
ldmia r0, {r4-r5}
;ldm r0, {r4-r5}
add r0, r0, r1
subs r12, r12, #1
stmia r2, {r4-r5}
;stm r2, {r4-r5}
add r2, r2, r3
bne copy_mem8x8_fast_loop
ldmia sp!, {r4-r5}
;pop {r4-r5}
mov pc, lr
ENDP ; |vp8_copy_mem8x8_v6|
END

Просмотреть файл

@ -1,70 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT |vp8_dc_only_idct_add_v6|
AREA |.text|, CODE, READONLY
;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
; int pred_stride, unsigned char *dst_ptr,
; int dst_stride)
; r0 input_dc
; r1 pred_ptr
; r2 pred_stride
; r3 dst_ptr
; sp dst_stride
|vp8_dc_only_idct_add_v6| PROC
stmdb sp!, {r4 - r7}
add r0, r0, #4 ; input_dc += 4
ldr r12, c0x0000FFFF
ldr r4, [r1], r2
and r0, r12, r0, asr #3 ; input_dc >> 3 + mask
ldr r6, [r1], r2
orr r0, r0, r0, lsl #16 ; a1 | a1
ldr r12, [sp, #16] ; dst stride
uxtab16 r5, r0, r4 ; a1+2 | a1+0
uxtab16 r4, r0, r4, ror #8 ; a1+3 | a1+1
uxtab16 r7, r0, r6
uxtab16 r6, r0, r6, ror #8
usat16 r5, #8, r5
usat16 r4, #8, r4
usat16 r7, #8, r7
usat16 r6, #8, r6
orr r5, r5, r4, lsl #8
orr r7, r7, r6, lsl #8
ldr r4, [r1], r2
str r5, [r3], r12
ldr r6, [r1]
str r7, [r3], r12
uxtab16 r5, r0, r4
uxtab16 r4, r0, r4, ror #8
uxtab16 r7, r0, r6
uxtab16 r6, r0, r6, ror #8
usat16 r5, #8, r5
usat16 r4, #8, r4
usat16 r7, #8, r7
usat16 r6, #8, r6
orr r5, r5, r4, lsl #8
orr r7, r7, r6, lsl #8
str r5, [r3], r12
str r7, [r3]
ldmia sp!, {r4 - r7}
bx lr
ENDP ; |vp8_dc_only_idct_add_v6|
; Constant Pool
c0x0000FFFF DCD 0x0000FFFF
END

Просмотреть файл

@ -1,190 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT |vp8_dequant_idct_add_v6|
AREA |.text|, CODE, READONLY
;void vp8_dequant_idct_v6(short *input, short *dq,
; unsigned char *dest, int stride)
; r0 = q
; r1 = dq
; r2 = dst
; r3 = stride
|vp8_dequant_idct_add_v6| PROC
stmdb sp!, {r4-r11, lr}
ldr r4, [r0] ;input
ldr r5, [r1], #4 ;dq
sub sp, sp, #4
str r3, [sp]
mov r12, #4
vp8_dequant_add_loop
smulbb r6, r4, r5
smultt r7, r4, r5
ldr r4, [r0, #4] ;input
ldr r5, [r1], #4 ;dq
strh r6, [r0], #2
strh r7, [r0], #2
smulbb r6, r4, r5
smultt r7, r4, r5
subs r12, r12, #1
ldrne r4, [r0, #4]
ldrne r5, [r1], #4
strh r6, [r0], #2
strh r7, [r0], #2
bne vp8_dequant_add_loop
sub r0, r0, #32
mov r1, r0
; short_idct4x4llm_v6_dual
ldr r3, cospi8sqrt2minus1
ldr r4, sinpi8sqrt2
ldr r6, [r0, #8]
mov r5, #2
vp8_dequant_idct_loop1_v6
ldr r12, [r0, #24]
ldr r14, [r0, #16]
smulwt r9, r3, r6
smulwb r7, r3, r6
smulwt r10, r4, r6
smulwb r8, r4, r6
pkhbt r7, r7, r9, lsl #16
smulwt r11, r3, r12
pkhbt r8, r8, r10, lsl #16
uadd16 r6, r6, r7
smulwt r7, r4, r12
smulwb r9, r3, r12
smulwb r10, r4, r12
subs r5, r5, #1
pkhbt r9, r9, r11, lsl #16
ldr r11, [r0], #4
pkhbt r10, r10, r7, lsl #16
uadd16 r7, r12, r9
usub16 r7, r8, r7
uadd16 r6, r6, r10
uadd16 r10, r11, r14
usub16 r8, r11, r14
uadd16 r9, r10, r6
usub16 r10, r10, r6
uadd16 r6, r8, r7
usub16 r7, r8, r7
str r6, [r1, #8]
ldrne r6, [r0, #8]
str r7, [r1, #16]
str r10, [r1, #24]
str r9, [r1], #4
bne vp8_dequant_idct_loop1_v6
mov r5, #2
sub r0, r1, #8
vp8_dequant_idct_loop2_v6
ldr r6, [r0], #4
ldr r7, [r0], #4
ldr r8, [r0], #4
ldr r9, [r0], #4
smulwt r1, r3, r6
smulwt r12, r4, r6
smulwt lr, r3, r8
smulwt r10, r4, r8
pkhbt r11, r8, r6, lsl #16
pkhbt r1, lr, r1, lsl #16
pkhbt r12, r10, r12, lsl #16
pkhtb r6, r6, r8, asr #16
uadd16 r6, r1, r6
pkhbt lr, r9, r7, lsl #16
uadd16 r10, r11, lr
usub16 lr, r11, lr
pkhtb r8, r7, r9, asr #16
subs r5, r5, #1
smulwt r1, r3, r8
smulwb r7, r3, r8
smulwt r11, r4, r8
smulwb r9, r4, r8
pkhbt r1, r7, r1, lsl #16
uadd16 r8, r1, r8
pkhbt r11, r9, r11, lsl #16
usub16 r1, r12, r8
uadd16 r8, r11, r6
ldr r9, c0x00040004
ldr r12, [sp] ; get stride from stack
uadd16 r6, r10, r8
usub16 r7, r10, r8
uadd16 r7, r7, r9
uadd16 r6, r6, r9
uadd16 r10, r14, r1
usub16 r1, r14, r1
uadd16 r10, r10, r9
uadd16 r1, r1, r9
ldr r11, [r2] ; load input from dst
mov r8, r7, asr #3
pkhtb r9, r8, r10, asr #19
mov r8, r1, asr #3
pkhtb r8, r8, r6, asr #19
uxtb16 lr, r11, ror #8
qadd16 r9, r9, lr
uxtb16 lr, r11
qadd16 r8, r8, lr
usat16 r9, #8, r9
usat16 r8, #8, r8
orr r9, r8, r9, lsl #8
ldr r11, [r2, r12] ; load input from dst
mov r7, r7, lsl #16
mov r1, r1, lsl #16
mov r10, r10, lsl #16
mov r6, r6, lsl #16
mov r7, r7, asr #3
pkhtb r7, r7, r10, asr #19
mov r1, r1, asr #3
pkhtb r1, r1, r6, asr #19
uxtb16 r8, r11, ror #8
qadd16 r7, r7, r8
uxtb16 r8, r11
qadd16 r1, r1, r8
usat16 r7, #8, r7
usat16 r1, #8, r1
orr r1, r1, r7, lsl #8
str r9, [r2], r12 ; store output to dst
str r1, [r2], r12 ; store output to dst
bne vp8_dequant_idct_loop2_v6
; memset
sub r0, r0, #32
add sp, sp, #4
mov r12, #0
str r12, [r0]
str r12, [r0, #4]
str r12, [r0, #8]
str r12, [r0, #12]
str r12, [r0, #16]
str r12, [r0, #20]
str r12, [r0, #24]
str r12, [r0, #28]
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_dequant_idct_add_v6|
; Constant Pool
cospi8sqrt2minus1 DCD 0x00004E7B
sinpi8sqrt2 DCD 0x00008A8C
c0x00040004 DCD 0x00040004
END

Просмотреть файл

@ -1,69 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_dequantize_b_loop_v6|
AREA |.text|, CODE, READONLY ; name this block of code
;-------------------------------
;void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
; r0 short *Q,
; r1 short *DQC
; r2 short *DQ
|vp8_dequantize_b_loop_v6| PROC
stmdb sp!, {r4-r9, lr}
ldr r3, [r0] ;load Q
ldr r4, [r1] ;load DQC
ldr r5, [r0, #4]
ldr r6, [r1, #4]
mov r12, #2 ;loop counter
dequant_loop
smulbb r7, r3, r4 ;multiply
smultt r8, r3, r4
smulbb r9, r5, r6
smultt lr, r5, r6
ldr r3, [r0, #8]
ldr r4, [r1, #8]
ldr r5, [r0, #12]
ldr r6, [r1, #12]
strh r7, [r2], #2 ;store result
smulbb r7, r3, r4 ;multiply
strh r8, [r2], #2
smultt r8, r3, r4
strh r9, [r2], #2
smulbb r9, r5, r6
strh lr, [r2], #2
smultt lr, r5, r6
subs r12, r12, #1
add r0, r0, #16
add r1, r1, #16
ldrne r3, [r0]
strh r7, [r2], #2 ;store result
ldrne r4, [r1]
strh r8, [r2], #2
ldrne r5, [r0, #4]
strh r9, [r2], #2
ldrne r6, [r1, #4]
strh lr, [r2], #2
bne dequant_loop
ldmia sp!, {r4-r9, pc}
ENDP ;|vp8_dequantize_b_loop_v6|
END

Просмотреть файл

@ -1,624 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_filter_block2d_first_pass_armv6|
EXPORT |vp8_filter_block2d_first_pass_16x16_armv6|
EXPORT |vp8_filter_block2d_first_pass_8x8_armv6|
EXPORT |vp8_filter_block2d_second_pass_armv6|
EXPORT |vp8_filter4_block2d_second_pass_armv6|
EXPORT |vp8_filter_block2d_first_pass_only_armv6|
EXPORT |vp8_filter_block2d_second_pass_only_armv6|
AREA |.text|, CODE, READONLY ; name this block of code
;-------------------------------------
; r0 unsigned char *src_ptr
; r1 short *output_ptr
; r2 unsigned int src_pixels_per_line
; r3 unsigned int output_width
; stack unsigned int output_height
; stack const short *vp8_filter
;-------------------------------------
; vp8_filter the input and put in the output array. Apply the 6 tap FIR filter with
; the output being a 2 byte value and the intput being a 1 byte value.
|vp8_filter_block2d_first_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
ldr r7, [sp, #36] ; output height
sub r2, r2, r3 ; inside loop increments input array,
; so the height loop only needs to add
; r2 - width to the input pointer
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
add r12, r3, #16 ; square off the output
sub sp, sp, #4
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
str r1, [sp] ; push destination to stack
mov r7, r7, lsl #16 ; height is top part of counter
; six tap filter
|height_loop_1st_6|
ldrb r8, [r0, #-2] ; load source data
ldrb r9, [r0, #-1]
ldrb r10, [r0], #2
orr r7, r7, r3, lsr #2 ; construct loop counter
|width_loop_1st_6|
ldrb r11, [r0, #-1]
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
ldrb r9, [r0]
smuad lr, lr, r4 ; apply the filter
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smuad r8, r8, r4
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
smlad lr, r10, r5, lr
ldrb r10, [r0, #1]
smlad r8, r11, r5, r8
ldrb r11, [r0, #2]
sub r7, r7, #1
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smlad lr, r9, r6, lr
smlad r11, r10, r6, r8
ands r10, r7, #0xff ; test loop counter
add lr, lr, #0x40 ; round_shift_and_clamp
ldrneb r8, [r0, #-2] ; load data for next loop
usat lr, #8, lr, asr #7
add r11, r11, #0x40
ldrneb r9, [r0, #-1]
usat r11, #8, r11, asr #7
strh lr, [r1], r12 ; result is transposed and stored, which
; will make second pass filtering easier.
ldrneb r10, [r0], #2
strh r11, [r1], r12
bne width_loop_1st_6
ldr r1, [sp] ; load and update dst address
subs r7, r7, #0x10000
add r0, r0, r2 ; move to next input line
add r1, r1, #2 ; move over to next column
str r1, [sp]
bne height_loop_1st_6
add sp, sp, #4
ldmia sp!, {r4 - r11, pc}
ENDP
; --------------------------
; 16x16 version
; -----------------------------
|vp8_filter_block2d_first_pass_16x16_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
ldr r7, [sp, #36] ; output height
add r4, r2, #18 ; preload next low
pld [r0, r4]
sub r2, r2, r3 ; inside loop increments input array,
; so the height loop only needs to add
; r2 - width to the input pointer
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
add r12, r3, #16 ; square off the output
sub sp, sp, #4
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
str r1, [sp] ; push destination to stack
mov r7, r7, lsl #16 ; height is top part of counter
; six tap filter
|height_loop_1st_16_6|
ldrb r8, [r0, #-2] ; load source data
ldrb r9, [r0, #-1]
ldrb r10, [r0], #2
orr r7, r7, r3, lsr #2 ; construct loop counter
|width_loop_1st_16_6|
ldrb r11, [r0, #-1]
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
ldrb r9, [r0]
smuad lr, lr, r4 ; apply the filter
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smuad r8, r8, r4
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
smlad lr, r10, r5, lr
ldrb r10, [r0, #1]
smlad r8, r11, r5, r8
ldrb r11, [r0, #2]
sub r7, r7, #1
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smlad lr, r9, r6, lr
smlad r11, r10, r6, r8
ands r10, r7, #0xff ; test loop counter
add lr, lr, #0x40 ; round_shift_and_clamp
ldrneb r8, [r0, #-2] ; load data for next loop
usat lr, #8, lr, asr #7
add r11, r11, #0x40
ldrneb r9, [r0, #-1]
usat r11, #8, r11, asr #7
strh lr, [r1], r12 ; result is transposed and stored, which
; will make second pass filtering easier.
ldrneb r10, [r0], #2
strh r11, [r1], r12
bne width_loop_1st_16_6
ldr r1, [sp] ; load and update dst address
subs r7, r7, #0x10000
add r0, r0, r2 ; move to next input line
add r11, r2, #34 ; adding back block width(=16)
pld [r0, r11] ; preload next low
add r1, r1, #2 ; move over to next column
str r1, [sp]
bne height_loop_1st_16_6
add sp, sp, #4
ldmia sp!, {r4 - r11, pc}
ENDP
; --------------------------
; 8x8 version
; -----------------------------
|vp8_filter_block2d_first_pass_8x8_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
ldr r7, [sp, #36] ; output height
add r4, r2, #10 ; preload next low
pld [r0, r4]
sub r2, r2, r3 ; inside loop increments input array,
; so the height loop only needs to add
; r2 - width to the input pointer
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
add r12, r3, #16 ; square off the output
sub sp, sp, #4
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
str r1, [sp] ; push destination to stack
mov r7, r7, lsl #16 ; height is top part of counter
; six tap filter
|height_loop_1st_8_6|
ldrb r8, [r0, #-2] ; load source data
ldrb r9, [r0, #-1]
ldrb r10, [r0], #2
orr r7, r7, r3, lsr #2 ; construct loop counter
|width_loop_1st_8_6|
ldrb r11, [r0, #-1]
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
ldrb r9, [r0]
smuad lr, lr, r4 ; apply the filter
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smuad r8, r8, r4
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
smlad lr, r10, r5, lr
ldrb r10, [r0, #1]
smlad r8, r11, r5, r8
ldrb r11, [r0, #2]
sub r7, r7, #1
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smlad lr, r9, r6, lr
smlad r11, r10, r6, r8
ands r10, r7, #0xff ; test loop counter
add lr, lr, #0x40 ; round_shift_and_clamp
ldrneb r8, [r0, #-2] ; load data for next loop
usat lr, #8, lr, asr #7
add r11, r11, #0x40
ldrneb r9, [r0, #-1]
usat r11, #8, r11, asr #7
strh lr, [r1], r12 ; result is transposed and stored, which
; will make second pass filtering easier.
ldrneb r10, [r0], #2
strh r11, [r1], r12
bne width_loop_1st_8_6
ldr r1, [sp] ; load and update dst address
subs r7, r7, #0x10000
add r0, r0, r2 ; move to next input line
add r11, r2, #18 ; adding back block width(=8)
pld [r0, r11] ; preload next low
add r1, r1, #2 ; move over to next column
str r1, [sp]
bne height_loop_1st_8_6
add sp, sp, #4
ldmia sp!, {r4 - r11, pc}
ENDP
;---------------------------------
; r0 short *src_ptr,
; r1 unsigned char *output_ptr,
; r2 unsigned int output_pitch,
; r3 unsigned int cnt,
; stack const short *vp8_filter
;---------------------------------
|vp8_filter_block2d_second_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #36] ; vp8_filter address
sub sp, sp, #4
mov r7, r3, lsl #16 ; height is top part of counter
str r1, [sp] ; push destination to stack
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
pkhbt r12, r5, r4 ; pack the filter differently
pkhbt r11, r6, r5
sub r0, r0, #4 ; offset input buffer
|height_loop_2nd|
ldr r8, [r0] ; load the data
ldr r9, [r0, #4]
orr r7, r7, r3, lsr #1 ; loop counter
|width_loop_2nd|
smuad lr, r4, r8 ; apply filter
sub r7, r7, #1
smulbt r8, r4, r8
ldr r10, [r0, #8]
smlad lr, r5, r9, lr
smladx r8, r12, r9, r8
ldrh r9, [r0, #12]
smlad lr, r6, r10, lr
smladx r8, r11, r10, r8
add r0, r0, #4
smlatb r10, r6, r9, r8
add lr, lr, #0x40 ; round_shift_and_clamp
ands r8, r7, #0xff
usat lr, #8, lr, asr #7
add r10, r10, #0x40
strb lr, [r1], r2 ; the result is transposed back and stored
usat r10, #8, r10, asr #7
ldrne r8, [r0] ; load data for next loop
ldrne r9, [r0, #4]
strb r10, [r1], r2
bne width_loop_2nd
ldr r1, [sp] ; update dst for next loop
subs r7, r7, #0x10000
add r0, r0, #16 ; updata src for next loop
add r1, r1, #1
str r1, [sp]
bne height_loop_2nd
add sp, sp, #4
ldmia sp!, {r4 - r11, pc}
ENDP
;---------------------------------
; r0 short *src_ptr,
; r1 unsigned char *output_ptr,
; r2 unsigned int output_pitch,
; r3 unsigned int cnt,
; stack const short *vp8_filter
;---------------------------------
|vp8_filter4_block2d_second_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #36] ; vp8_filter address
mov r7, r3, lsl #16 ; height is top part of counter
ldr r4, [r11] ; load up packed filter coefficients
add lr, r1, r3 ; save final destination pointer
ldr r5, [r11, #4]
ldr r6, [r11, #8]
pkhbt r12, r5, r4 ; pack the filter differently
pkhbt r11, r6, r5
mov r4, #0x40 ; rounding factor (for smlad{x})
|height_loop_2nd_4|
ldrd r8, r9, [r0, #-4] ; load the data
orr r7, r7, r3, lsr #1 ; loop counter
|width_loop_2nd_4|
ldr r10, [r0, #4]!
smladx r6, r9, r12, r4 ; apply filter
pkhbt r8, r9, r8
smlad r5, r8, r12, r4
pkhbt r8, r10, r9
smladx r6, r10, r11, r6
sub r7, r7, #1
smlad r5, r8, r11, r5
mov r8, r9 ; shift the data for the next loop
mov r9, r10
usat r6, #8, r6, asr #7 ; shift and clamp
usat r5, #8, r5, asr #7
strb r5, [r1], r2 ; the result is transposed back and stored
tst r7, #0xff
strb r6, [r1], r2
bne width_loop_2nd_4
subs r7, r7, #0x10000
add r0, r0, #16 ; update src for next loop
sub r1, lr, r7, lsr #16 ; update dst for next loop
bne height_loop_2nd_4
ldmia sp!, {r4 - r11, pc}
ENDP
;------------------------------------
; r0 unsigned char *src_ptr
; r1 unsigned char *output_ptr,
; r2 unsigned int src_pixels_per_line
; r3 unsigned int cnt,
; stack unsigned int output_pitch,
; stack const short *vp8_filter
;------------------------------------
|vp8_filter_block2d_first_pass_only_armv6| PROC
stmdb sp!, {r4 - r11, lr}
add r7, r2, r3 ; preload next low
add r7, r7, #2
pld [r0, r7]
ldr r4, [sp, #36] ; output pitch
ldr r11, [sp, #40] ; HFilter address
sub sp, sp, #8
mov r7, r3
sub r2, r2, r3 ; inside loop increments input array,
; so the height loop only needs to add
; r2 - width to the input pointer
sub r4, r4, r3
str r4, [sp] ; save modified output pitch
str r2, [sp, #4]
mov r2, #0x40
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
; six tap filter
|height_loop_1st_only_6|
ldrb r8, [r0, #-2] ; load data
ldrb r9, [r0, #-1]
ldrb r10, [r0], #2
mov r12, r3, lsr #1 ; loop counter
|width_loop_1st_only_6|
ldrb r11, [r0, #-1]
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
ldrb r9, [r0]
;; smuad lr, lr, r4
smlad lr, lr, r4, r2
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
;; smuad r8, r8, r4
smlad r8, r8, r4, r2
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
smlad lr, r10, r5, lr
ldrb r10, [r0, #1]
smlad r8, r11, r5, r8
ldrb r11, [r0, #2]
subs r12, r12, #1
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smlad lr, r9, r6, lr
smlad r10, r10, r6, r8
;; add lr, lr, #0x40 ; round_shift_and_clamp
ldrneb r8, [r0, #-2] ; load data for next loop
usat lr, #8, lr, asr #7
;; add r10, r10, #0x40
strb lr, [r1], #1 ; store the result
usat r10, #8, r10, asr #7
ldrneb r9, [r0, #-1]
strb r10, [r1], #1
ldrneb r10, [r0], #2
bne width_loop_1st_only_6
ldr lr, [sp] ; load back output pitch
ldr r12, [sp, #4] ; load back output pitch
subs r7, r7, #1
add r0, r0, r12 ; updata src for next loop
add r11, r12, r3 ; preload next low
add r11, r11, #2
pld [r0, r11]
add r1, r1, lr ; update dst for next loop
bne height_loop_1st_only_6
add sp, sp, #8
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_filter_block2d_first_pass_only_armv6|
;------------------------------------
; r0 unsigned char *src_ptr,
; r1 unsigned char *output_ptr,
; r2 unsigned int src_pixels_per_line
; r3 unsigned int cnt,
; stack unsigned int output_pitch,
; stack const short *vp8_filter
;------------------------------------
|vp8_filter_block2d_second_pass_only_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; VFilter address
ldr r12, [sp, #36] ; output pitch
mov r7, r3, lsl #16 ; height is top part of counter
sub r0, r0, r2, lsl #1 ; need 6 elements for filtering, 2 before, 3 after
sub sp, sp, #8
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
str r0, [sp] ; save r0 to stack
str r1, [sp, #4] ; save dst to stack
; six tap filter
|width_loop_2nd_only_6|
ldrb r8, [r0], r2 ; load data
orr r7, r7, r3 ; loop counter
ldrb r9, [r0], r2
ldrb r10, [r0], r2
|height_loop_2nd_only_6|
; filter first column in this inner loop, than, move to next colum.
ldrb r11, [r0], r2
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
ldrb r9, [r0], r2
smuad lr, lr, r4
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smuad r8, r8, r4
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
smlad lr, r10, r5, lr
ldrb r10, [r0], r2
smlad r8, r11, r5, r8
ldrb r11, [r0]
sub r7, r7, #2
sub r0, r0, r2, lsl #2
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smlad lr, r9, r6, lr
smlad r10, r10, r6, r8
ands r9, r7, #0xff
add lr, lr, #0x40 ; round_shift_and_clamp
ldrneb r8, [r0], r2 ; load data for next loop
usat lr, #8, lr, asr #7
add r10, r10, #0x40
strb lr, [r1], r12 ; store the result for the column
usat r10, #8, r10, asr #7
ldrneb r9, [r0], r2
strb r10, [r1], r12
ldrneb r10, [r0], r2
bne height_loop_2nd_only_6
ldr r0, [sp]
ldr r1, [sp, #4]
subs r7, r7, #0x10000
add r0, r0, #1 ; move to filter next column
str r0, [sp]
add r1, r1, #1
str r1, [sp, #4]
bne width_loop_2nd_only_6
add sp, sp, #8
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_filter_block2d_second_pass_only_armv6|
END

Просмотреть файл

@ -1,115 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
unsigned char *dst,
int stride, char *eobs)
{
int i;
for (i = 0; i < 4; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_v6 (q, dq, dst, stride);
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dst, stride, dst, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_v6 (q+16, dq, dst+4, stride);
else if (eobs[1] == 1)
{
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dst+4, stride, dst+4, stride);
((int *)(q+16))[0] = 0;
}
if (eobs[2] > 1)
vp8_dequant_idct_add_v6 (q+32, dq, dst+8, stride);
else if (eobs[2] == 1)
{
vp8_dc_only_idct_add_v6 (q[32]*dq[0], dst+8, stride, dst+8, stride);
((int *)(q+32))[0] = 0;
}
if (eobs[3] > 1)
vp8_dequant_idct_add_v6 (q+48, dq, dst+12, stride);
else if (eobs[3] == 1)
{
vp8_dc_only_idct_add_v6 (q[48]*dq[0], dst+12, stride,dst+12,stride);
((int *)(q+48))[0] = 0;
}
q += 64;
dst += 4*stride;
eobs += 4;
}
}
void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq,
unsigned char *dstu,
unsigned char *dstv,
int stride, char *eobs)
{
int i;
for (i = 0; i < 2; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_v6 (q, dq, dstu, stride);
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstu, stride, dstu, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_v6 (q+16, dq, dstu+4, stride);
else if (eobs[1] == 1)
{
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstu+4, stride,
dstu+4, stride);
((int *)(q+16))[0] = 0;
}
q += 32;
dstu += 4*stride;
eobs += 2;
}
for (i = 0; i < 2; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_v6 (q, dq, dstv, stride);
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstv, stride, dstv, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_v6 (q+16, dq, dstv+4, stride);
else if (eobs[1] == 1)
{
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstv+4, stride,
dstv+4, stride);
((int *)(q+16))[0] = 0;
}
q += 32;
dstv += 4*stride;
eobs += 2;
}
}

Просмотреть файл

@ -1,202 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_short_idct4x4llm_v6_dual|
AREA |.text|, CODE, READONLY
; void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
; unsigned char *dst, int stride)
; r0 short* input
; r1 unsigned char* pred
; r2 int pitch
; r3 unsigned char* dst
; sp int stride
|vp8_short_idct4x4llm_v6_dual| PROC
stmdb sp!, {r4-r11, lr}
sub sp, sp, #4
mov r4, #0x00008A00 ; sin
orr r4, r4, #0x0000008C ; sinpi8sqrt2
mov r5, #0x00004E00 ; cos
orr r5, r5, #0x0000007B ; cospi8sqrt2minus1
orr r5, r5, #1<<31 ; loop counter on top bit
loop1_dual
ldr r6, [r0, #(4*2)] ; i5 | i4
ldr r12, [r0, #(12*2)] ; i13|i12
ldr r14, [r0, #(8*2)] ; i9 | i8
smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16
smulbb r7, r5, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16
smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16
smulbt r11, r5, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16
pkhtb r7, r9, r7, asr #16 ; 5c | 4c
pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
uadd16 r6, r6, r7 ; 5c+5 | 4c+4
smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16
smulbb r9, r5, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16
smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16
subs r5, r5, #1<<31 ; i--
pkhtb r9, r11, r9, asr #16 ; 13c | 12c
ldr r11, [r0] ; i1 | i0
pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
uadd16 r7, r12, r9 ; 13c+13 | 12c+12
usub16 r7, r8, r7 ; c
uadd16 r6, r6, r10 ; d
uadd16 r10, r11, r14 ; a
usub16 r8, r11, r14 ; b
uadd16 r9, r10, r6 ; a+d
usub16 r10, r10, r6 ; a-d
uadd16 r6, r8, r7 ; b+c
usub16 r7, r8, r7 ; b-c
; use input buffer to store intermediate results
str r6, [r0, #(4*2)] ; o5 | o4
str r7, [r0, #(8*2)] ; o9 | o8
str r10,[r0, #(12*2)] ; o13|o12
str r9, [r0], #4 ; o1 | o0
bcs loop1_dual
sub r0, r0, #8 ; reset input/output
str r0, [sp]
loop2_dual
ldr r6, [r0, #(4*2)] ; i5 | i4
ldr r12,[r0, #(2*2)] ; i3 | i2
ldr r14,[r0, #(6*2)] ; i7 | i6
ldr r0, [r0, #(0*2)] ; i1 | i0
smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16
smulbt r7, r5, r0 ; (ip[1] * cospi8sqrt2minus1) >> 16
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16
smulwt r8, r4, r0 ; (ip[1] * sinpi8sqrt2) >> 16
pkhbt r11, r6, r0, lsl #16 ; i0 | i4
pkhtb r7, r7, r9, asr #16 ; 1c | 5c
pkhtb r0, r0, r6, asr #16 ; i1 | i5
pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1
uadd16 r0, r7, r0 ; 1c+1 | 5c+5 = temp2
pkhbt r9, r14, r12, lsl #16 ; i2 | i6
uadd16 r10, r11, r9 ; a
usub16 r9, r11, r9 ; b
pkhtb r6, r12, r14, asr #16 ; i3 | i7
subs r5, r5, #1<<31 ; i--
smulbt r7, r5, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16
smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16
smulbb r12, r5, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16
smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16
pkhtb r7, r7, r12, asr #16 ; 3c | 7c
pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1
uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2
usub16 r12, r8, r6 ; c (o1 | o5)
uadd16 r6, r11, r0 ; d (o3 | o7)
uadd16 r7, r10, r6 ; a+d
mov r8, #4 ; set up 4's
orr r8, r8, #0x40000 ; 4|4
usub16 r6, r10, r6 ; a-d
uadd16 r6, r6, r8 ; a-d+4, 3|7
uadd16 r7, r7, r8 ; a+d+4, 0|4
uadd16 r10, r9, r12 ; b+c
usub16 r0, r9, r12 ; b-c
uadd16 r10, r10, r8 ; b+c+4, 1|5
uadd16 r8, r0, r8 ; b-c+4, 2|6
ldr lr, [sp, #40] ; dst stride
ldrb r0, [r1] ; pred p0
ldrb r11, [r1, #1] ; pred p1
ldrb r12, [r1, #2] ; pred p2
add r0, r0, r7, asr #19 ; p0 + o0
add r11, r11, r10, asr #19 ; p1 + o1
add r12, r12, r8, asr #19 ; p2 + o2
usat r0, #8, r0 ; d0 = clip8(p0 + o0)
usat r11, #8, r11 ; d1 = clip8(p1 + o1)
usat r12, #8, r12 ; d2 = clip8(p2 + o2)
add r0, r0, r11, lsl #8 ; |--|--|d1|d0|
ldrb r11, [r1, #3] ; pred p3
add r0, r0, r12, lsl #16 ; |--|d2|d1|d0|
add r11, r11, r6, asr #19 ; p3 + o3
sxth r7, r7 ;
sxth r10, r10 ;
usat r11, #8, r11 ; d3 = clip8(p3 + o3)
sxth r8, r8 ;
sxth r6, r6 ;
add r0, r0, r11, lsl #24 ; |d3|d2|d1|d0|
ldrb r12, [r1, r2]! ; pred p4
str r0, [r3], lr
ldrb r11, [r1, #1] ; pred p5
add r12, r12, r7, asr #3 ; p4 + o4
add r11, r11, r10, asr #3 ; p5 + o5
usat r12, #8, r12 ; d4 = clip8(p4 + o4)
usat r11, #8, r11 ; d5 = clip8(p5 + o5)
ldrb r7, [r1, #2] ; pred p6
ldrb r10, [r1, #3] ; pred p6
add r12, r12, r11, lsl #8 ; |--|--|d5|d4|
add r7, r7, r8, asr #3 ; p6 + o6
add r10, r10, r6, asr #3 ; p7 + o7
ldr r0, [sp] ; load input pointer
usat r7, #8, r7 ; d6 = clip8(p6 + o6)
usat r10, #8, r10 ; d7 = clip8(p7 + o7)
add r12, r12, r7, lsl #16 ; |--|d6|d5|d4|
add r12, r12, r10, lsl #24 ; |d7|d6|d5|d4|
str r12, [r3], lr
add r0, r0, #16
add r1, r1, r2 ; pred + pitch
bcs loop2_dual
add sp, sp, #4 ; idct_output buffer
ldmia sp!, {r4 - r11, pc}
ENDP
END

Просмотреть файл

@ -1,136 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_short_inv_walsh4x4_v6|
ARM
REQUIRE8
PRESERVE8
AREA |.text|, CODE, READONLY ; name this block of code
;short vp8_short_inv_walsh4x4_v6(short *input, short *mb_dqcoeff)
|vp8_short_inv_walsh4x4_v6| PROC
stmdb sp!, {r4 - r12, lr}
ldr r2, [r0, #0] ; [1 | 0]
ldr r3, [r0, #4] ; [3 | 2]
ldr r4, [r0, #8] ; [5 | 4]
ldr r5, [r0, #12] ; [7 | 6]
ldr r6, [r0, #16] ; [9 | 8]
ldr r7, [r0, #20] ; [11 | 10]
ldr r8, [r0, #24] ; [13 | 12]
ldr r9, [r0, #28] ; [15 | 14]
qadd16 r10, r2, r8 ; a1 [1+13 | 0+12]
qadd16 r11, r4, r6 ; b1 [5+9 | 4+8]
qsub16 r12, r4, r6 ; c1 [5-9 | 4-8]
qsub16 lr, r2, r8 ; d1 [1-13 | 0-12]
qadd16 r2, r10, r11 ; a1 + b1 [1 | 0]
qadd16 r4, r12, lr ; c1 + d1 [5 | 4]
qsub16 r6, r10, r11 ; a1 - b1 [9 | 8]
qsub16 r8, lr, r12 ; d1 - c1 [13 | 12]
qadd16 r10, r3, r9 ; a1 [3+15 | 2+14]
qadd16 r11, r5, r7 ; b1 [7+11 | 6+10]
qsub16 r12, r5, r7 ; c1 [7-11 | 6-10]
qsub16 lr, r3, r9 ; d1 [3-15 | 2-14]
qadd16 r3, r10, r11 ; a1 + b1 [3 | 2]
qadd16 r5, r12, lr ; c1 + d1 [7 | 6]
qsub16 r7, r10, r11 ; a1 - b1 [11 | 10]
qsub16 r9, lr, r12 ; d1 - c1 [15 | 14]
; first transform complete
qsubaddx r10, r2, r3 ; [c1|a1] [1-2 | 0+3]
qaddsubx r11, r2, r3 ; [b1|d1] [1+2 | 0-3]
qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7]
qaddsubx lr, r4, r5 ; [b1|d1] [5+6 | 4-7]
qaddsubx r2, r10, r11 ; [b2|c2] [c1+d1 | a1-b1]
qaddsubx r3, r11, r10 ; [a2|d2] [b1+a1 | d1-c1]
ldr r10, c0x00030003
qaddsubx r4, r12, lr ; [b2|c2] [c1+d1 | a1-b1]
qaddsubx r5, lr, r12 ; [a2|d2] [b1+a1 | d1-c1]
qadd16 r2, r2, r10 ; [b2+3|c2+3]
qadd16 r3, r3, r10 ; [a2+3|d2+3]
qadd16 r4, r4, r10 ; [b2+3|c2+3]
qadd16 r5, r5, r10 ; [a2+3|d2+3]
asr r12, r3, #19 ; [0]
strh r12, [r1], #32
asr lr, r2, #19 ; [1]
strh lr, [r1], #32
sxth r2, r2
sxth r3, r3
asr r2, r2, #3 ; [2]
strh r2, [r1], #32
asr r3, r3, #3 ; [3]
strh r3, [r1], #32
asr r12, r5, #19 ; [4]
strh r12, [r1], #32
asr lr, r4, #19 ; [5]
strh lr, [r1], #32
sxth r4, r4
sxth r5, r5
asr r4, r4, #3 ; [6]
strh r4, [r1], #32
asr r5, r5, #3 ; [7]
strh r5, [r1], #32
qsubaddx r2, r6, r7 ; [c1|a1] [9-10 | 8+11]
qaddsubx r3, r6, r7 ; [b1|d1] [9+10 | 8-11]
qsubaddx r4, r8, r9 ; [c1|a1] [13-14 | 12+15]
qaddsubx r5, r8, r9 ; [b1|d1] [13+14 | 12-15]
qaddsubx r6, r2, r3 ; [b2|c2] [c1+d1 | a1-b1]
qaddsubx r7, r3, r2 ; [a2|d2] [b1+a1 | d1-c1]
qaddsubx r8, r4, r5 ; [b2|c2] [c1+d1 | a1-b1]
qaddsubx r9, r5, r4 ; [a2|d2] [b1+a1 | d1-c1]
qadd16 r6, r6, r10 ; [b2+3|c2+3]
qadd16 r7, r7, r10 ; [a2+3|d2+3]
qadd16 r8, r8, r10 ; [b2+3|c2+3]
qadd16 r9, r9, r10 ; [a2+3|d2+3]
asr r12, r7, #19 ; [8]
strh r12, [r1], #32
asr lr, r6, #19 ; [9]
strh lr, [r1], #32
sxth r6, r6
sxth r7, r7
asr r6, r6, #3 ; [10]
strh r6, [r1], #32
asr r7, r7, #3 ; [11]
strh r7, [r1], #32
asr r12, r9, #19 ; [12]
strh r12, [r1], #32
asr lr, r8, #19 ; [13]
strh lr, [r1], #32
sxth r8, r8
sxth r9, r9
asr r8, r8, #3 ; [14]
strh r8, [r1], #32
asr r9, r9, #3 ; [15]
strh r9, [r1], #32
ldmia sp!, {r4 - r12, pc}
ENDP ; |vp8_short_inv_walsh4x4_v6|
; Constant Pool
c0x00030003 DCD 0x00030003
END

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,286 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_loop_filter_simple_horizontal_edge_armv6|
EXPORT |vp8_loop_filter_simple_vertical_edge_armv6|
AREA |.text|, CODE, READONLY ; name this block of code
MACRO
TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3
; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3
; a0: 03 02 01 00
; a1: 13 12 11 10
; a2: 23 22 21 20
; a3: 33 32 31 30
; b3 b2 b1 b0
uxtb16 $b1, $a1 ; xx 12 xx 10
uxtb16 $b0, $a0 ; xx 02 xx 00
uxtb16 $b3, $a3 ; xx 32 xx 30
uxtb16 $b2, $a2 ; xx 22 xx 20
orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00
orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20
uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11
uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31
uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01
uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21
orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01
orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21
pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1
pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3
pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0
pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2
MEND
src RN r0
pstep RN r1
;r0 unsigned char *src_ptr,
;r1 int src_pixel_step,
;r2 const char *blimit
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb sp!, {r4 - r11, lr}
ldrb r12, [r2] ; blimit
ldr r3, [src, -pstep, lsl #1] ; p1
ldr r4, [src, -pstep] ; p0
ldr r5, [src] ; q0
ldr r6, [src, pstep] ; q1
orr r12, r12, r12, lsl #8 ; blimit
ldr r2, c0x80808080
orr r12, r12, r12, lsl #16 ; blimit
mov r9, #4 ; double the count. we're doing 4 at a time
mov lr, #0 ; need 0 in a couple places
|simple_hnext8|
; vp8_simple_filter_mask()
uqsub8 r7, r3, r6 ; p1 - q1
uqsub8 r8, r6, r3 ; q1 - p1
uqsub8 r10, r4, r5 ; p0 - q0
uqsub8 r11, r5, r4 ; q0 - p0
orr r8, r8, r7 ; abs(p1 - q1)
orr r10, r10, r11 ; abs(p0 - q0)
uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2
uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1
uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
mvn r8, #0
usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags
sel r10, r8, lr ; filter mask: F or 0
cmp r10, #0
beq simple_hskip_filter ; skip filtering if all masks are 0x00
;vp8_simple_filter()
eor r3, r3, r2 ; p1 offset to convert to a signed value
eor r6, r6, r2 ; q1 offset to convert to a signed value
eor r4, r4, r2 ; p0 offset to convert to a signed value
eor r5, r5, r2 ; q0 offset to convert to a signed value
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
qsub8 r6, r5, r4 ; q0 - p0
qadd8 r3, r3, r6 ; += q0 - p0
ldr r7, c0x04040404
qadd8 r3, r3, r6 ; += q0 - p0
ldr r8, c0x03030303
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
;STALL
and r3, r3, r10 ; vp8_filter &= mask
qadd8 r7 , r3 , r7 ; Filter1 = vp8_filter + 4
qadd8 r8 , r3 , r8 ; Filter2 = vp8_filter + 3
shadd8 r7 , r7 , lr
shadd8 r8 , r8 , lr
shadd8 r7 , r7 , lr
shadd8 r8 , r8 , lr
shadd8 r7 , r7 , lr ; Filter1 >>= 3
shadd8 r8 , r8 , lr ; Filter2 >>= 3
qsub8 r5 ,r5, r7 ; u = q0 - Filter1
qadd8 r4, r4, r8 ; u = p0 + Filter2
eor r5, r5, r2 ; *oq0 = u^0x80
str r5, [src] ; store oq0 result
eor r4, r4, r2 ; *op0 = u^0x80
str r4, [src, -pstep] ; store op0 result
|simple_hskip_filter|
subs r9, r9, #1
addne src, src, #4 ; next row
ldrne r3, [src, -pstep, lsl #1] ; p1
ldrne r4, [src, -pstep] ; p0
ldrne r5, [src] ; q0
ldrne r6, [src, pstep] ; q1
bne simple_hnext8
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_loop_filter_simple_horizontal_edge_armv6|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|vp8_loop_filter_simple_vertical_edge_armv6| PROC
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb sp!, {r4 - r11, lr}
ldrb r12, [r2] ; r12: blimit
ldr r2, c0x80808080
orr r12, r12, r12, lsl #8
; load soure data to r7, r8, r9, r10
ldrh r3, [src, #-2]
pld [src, #23] ; preload for next block
ldrh r4, [src], pstep
orr r12, r12, r12, lsl #16
ldrh r5, [src, #-2]
pld [src, #23]
ldrh r6, [src], pstep
pkhbt r7, r3, r4, lsl #16
ldrh r3, [src, #-2]
pld [src, #23]
ldrh r4, [src], pstep
pkhbt r8, r5, r6, lsl #16
ldrh r5, [src, #-2]
pld [src, #23]
ldrh r6, [src], pstep
mov r11, #4 ; double the count. we're doing 4 at a time
|simple_vnext8|
; vp8_simple_filter_mask() function
pkhbt r9, r3, r4, lsl #16
pkhbt r10, r5, r6, lsl #16
;transpose r7, r8, r9, r10 to r3, r4, r5, r6
TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6
uqsub8 r7, r3, r6 ; p1 - q1
uqsub8 r8, r6, r3 ; q1 - p1
uqsub8 r9, r4, r5 ; p0 - q0
uqsub8 r10, r5, r4 ; q0 - p0
orr r7, r7, r8 ; abs(p1 - q1)
orr r9, r9, r10 ; abs(p0 - q0)
mov r8, #0
uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2
uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2
uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
mvn r10, #0 ; r10 == -1
usub8 r7, r12, r7 ; compare to flimit
sel lr, r10, r8 ; filter mask
cmp lr, #0
beq simple_vskip_filter ; skip filtering
;vp8_simple_filter() function
eor r3, r3, r2 ; p1 offset to convert to a signed value
eor r6, r6, r2 ; q1 offset to convert to a signed value
eor r4, r4, r2 ; p0 offset to convert to a signed value
eor r5, r5, r2 ; q0 offset to convert to a signed value
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
qsub8 r6, r5, r4 ; q0 - p0
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
ldr r9, c0x03030303 ; r9 = 3
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
ldr r7, c0x04040404
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
;STALL
and r3, r3, lr ; vp8_filter &= mask
qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3
qadd8 r3 , r3 , r7 ; Filter1 = vp8_filter + 4
shadd8 r9 , r9 , r8
shadd8 r3 , r3 , r8
shadd8 r9 , r9 , r8
shadd8 r3 , r3 , r8
shadd8 r9 , r9 , r8 ; Filter2 >>= 3
shadd8 r3 , r3 , r8 ; Filter1 >>= 3
;calculate output
sub src, src, pstep, lsl #2
qadd8 r4, r4, r9 ; u = p0 + Filter2
qsub8 r5, r5, r3 ; u = q0 - Filter1
eor r4, r4, r2 ; *op0 = u^0x80
eor r5, r5, r2 ; *oq0 = u^0x80
strb r4, [src, #-1] ; store the result
mov r4, r4, lsr #8
strb r5, [src], pstep
mov r5, r5, lsr #8
strb r4, [src, #-1]
mov r4, r4, lsr #8
strb r5, [src], pstep
mov r5, r5, lsr #8
strb r4, [src, #-1]
mov r4, r4, lsr #8
strb r5, [src], pstep
mov r5, r5, lsr #8
strb r4, [src, #-1]
strb r5, [src], pstep
|simple_vskip_filter|
subs r11, r11, #1
; load soure data to r7, r8, r9, r10
ldrneh r3, [src, #-2]
pld [src, #23] ; preload for next block
ldrneh r4, [src], pstep
ldrneh r5, [src, #-2]
pld [src, #23]
ldrneh r6, [src], pstep
pkhbt r7, r3, r4, lsl #16
ldrneh r3, [src, #-2]
pld [src, #23]
ldrneh r4, [src], pstep
pkhbt r8, r5, r6, lsl #16
ldrneh r5, [src, #-2]
pld [src, #23]
ldrneh r6, [src], pstep
bne simple_vnext8
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_loop_filter_simple_vertical_edge_armv6|
; Constant Pool
c0x80808080 DCD 0x80808080
c0x03030303 DCD 0x03030303
c0x04040404 DCD 0x04040404
END

Просмотреть файл

@ -1,273 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_sixtap_predict8x4_armv6|
AREA |.text|, CODE, READONLY ; name this block of code
;-------------------------------------
; r0 unsigned char *src_ptr,
; r1 int src_pixels_per_line,
; r2 int xoffset,
; r3 int yoffset,
; stack unsigned char *dst_ptr,
; stack int dst_pitch
;-------------------------------------
;note: In first pass, store the result in transpose(8linesx9columns) on stack. Temporary stack size is 184.
;Line width is 20 that is 9 short data plus 2 to make it 4bytes aligned. In second pass, load data from stack,
;and the result is stored in transpose.
|vp8_sixtap_predict8x4_armv6| PROC
stmdb sp!, {r4 - r11, lr}
str r3, [sp, #-184]! ;reserve space on stack for temporary storage, store yoffset
cmp r2, #0 ;skip first_pass filter if xoffset=0
add lr, sp, #4 ;point to temporary buffer
beq skip_firstpass_filter
;first-pass filter
adr r12, filter8_coeff
sub r0, r0, r1, lsl #1
add r3, r1, #10 ; preload next low
pld [r0, r3]
add r2, r12, r2, lsl #4 ;calculate filter location
add r0, r0, #3 ;adjust src only for loading convinience
ldr r3, [r2] ; load up packed filter coefficients
ldr r4, [r2, #4]
ldr r5, [r2, #8]
mov r2, #0x90000 ; height=9 is top part of counter
sub r1, r1, #8
|first_pass_hloop_v6|
ldrb r6, [r0, #-5] ; load source data
ldrb r7, [r0, #-4]
ldrb r8, [r0, #-3]
ldrb r9, [r0, #-2]
ldrb r10, [r0, #-1]
orr r2, r2, #0x4 ; construct loop counter. width=8=4x2
pkhbt r6, r6, r7, lsl #16 ; r7 | r6
pkhbt r7, r7, r8, lsl #16 ; r8 | r7
pkhbt r8, r8, r9, lsl #16 ; r9 | r8
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|first_pass_wloop_v6|
smuad r11, r6, r3 ; vp8_filter[0], vp8_filter[1]
smuad r12, r7, r3
ldrb r6, [r0], #1
smlad r11, r8, r4, r11 ; vp8_filter[2], vp8_filter[3]
ldrb r7, [r0], #1
smlad r12, r9, r4, r12
pkhbt r10, r10, r6, lsl #16 ; r10 | r9
pkhbt r6, r6, r7, lsl #16 ; r11 | r10
smlad r11, r10, r5, r11 ; vp8_filter[4], vp8_filter[5]
smlad r12, r6, r5, r12
sub r2, r2, #1
add r11, r11, #0x40 ; round_shift_and_clamp
tst r2, #0xff ; test loop counter
usat r11, #8, r11, asr #7
add r12, r12, #0x40
strh r11, [lr], #20 ; result is transposed and stored, which
usat r12, #8, r12, asr #7
strh r12, [lr], #20
movne r11, r6
movne r12, r7
movne r6, r8
movne r7, r9
movne r8, r10
movne r9, r11
movne r10, r12
bne first_pass_wloop_v6
;;add r9, ppl, #30 ; attempt to load 2 adjacent cache lines
;;IF ARCHITECTURE=6
;pld [src, ppl]
;;pld [src, r9]
;;ENDIF
subs r2, r2, #0x10000
sub lr, lr, #158
add r0, r0, r1 ; move to next input line
add r11, r1, #18 ; preload next low. adding back block width(=8), which is subtracted earlier
pld [r0, r11]
bne first_pass_hloop_v6
;second pass filter
secondpass_filter
ldr r3, [sp], #4 ; load back yoffset
ldr r0, [sp, #216] ; load dst address from stack 180+36
ldr r1, [sp, #220] ; load dst stride from stack 180+40
cmp r3, #0
beq skip_secondpass_filter
adr r12, filter8_coeff
add lr, r12, r3, lsl #4 ;calculate filter location
mov r2, #0x00080000
ldr r3, [lr] ; load up packed filter coefficients
ldr r4, [lr, #4]
ldr r5, [lr, #8]
pkhbt r12, r4, r3 ; pack the filter differently
pkhbt r11, r5, r4
second_pass_hloop_v6
ldr r6, [sp] ; load the data
ldr r7, [sp, #4]
orr r2, r2, #2 ; loop counter
second_pass_wloop_v6
smuad lr, r3, r6 ; apply filter
smulbt r10, r3, r6
ldr r8, [sp, #8]
smlad lr, r4, r7, lr
smladx r10, r12, r7, r10
ldrh r9, [sp, #12]
smlad lr, r5, r8, lr
smladx r10, r11, r8, r10
add sp, sp, #4
smlatb r10, r5, r9, r10
sub r2, r2, #1
add lr, lr, #0x40 ; round_shift_and_clamp
tst r2, #0xff
usat lr, #8, lr, asr #7
add r10, r10, #0x40
strb lr, [r0], r1 ; the result is transposed back and stored
usat r10, #8, r10, asr #7
strb r10, [r0],r1
movne r6, r7
movne r7, r8
bne second_pass_wloop_v6
subs r2, r2, #0x10000
add sp, sp, #12 ; updata src for next loop (20-8)
sub r0, r0, r1, lsl #2
add r0, r0, #1
bne second_pass_hloop_v6
add sp, sp, #20
ldmia sp!, {r4 - r11, pc}
;--------------------
skip_firstpass_filter
sub r0, r0, r1, lsl #1
sub r1, r1, #8
mov r2, #9
skip_firstpass_hloop
ldrb r4, [r0], #1 ; load data
subs r2, r2, #1
ldrb r5, [r0], #1
strh r4, [lr], #20 ; store it to immediate buffer
ldrb r6, [r0], #1 ; load data
strh r5, [lr], #20
ldrb r7, [r0], #1
strh r6, [lr], #20
ldrb r8, [r0], #1
strh r7, [lr], #20
ldrb r9, [r0], #1
strh r8, [lr], #20
ldrb r10, [r0], #1
strh r9, [lr], #20
ldrb r11, [r0], #1
strh r10, [lr], #20
add r0, r0, r1 ; move to next input line
strh r11, [lr], #20
sub lr, lr, #158 ; move over to next column
bne skip_firstpass_hloop
b secondpass_filter
;--------------------
skip_secondpass_filter
mov r2, #8
add sp, sp, #4 ;start from src[0] instead of src[-2]
skip_secondpass_hloop
ldr r6, [sp], #4
subs r2, r2, #1
ldr r8, [sp], #4
mov r7, r6, lsr #16 ; unpack
strb r6, [r0], r1
mov r9, r8, lsr #16
strb r7, [r0], r1
add sp, sp, #12 ; 20-8
strb r8, [r0], r1
strb r9, [r0], r1
sub r0, r0, r1, lsl #2
add r0, r0, #1
bne skip_secondpass_hloop
add sp, sp, #16 ; 180 - (160 +4)
ldmia sp!, {r4 - r11, pc}
ENDP
;-----------------
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
filter8_coeff
DCD 0x00000000, 0x00000080, 0x00000000, 0x00000000
DCD 0xfffa0000, 0x000c007b, 0x0000ffff, 0x00000000
DCD 0xfff50002, 0x0024006c, 0x0001fff8, 0x00000000
DCD 0xfff70000, 0x0032005d, 0x0000fffa, 0x00000000
DCD 0xfff00003, 0x004d004d, 0x0003fff0, 0x00000000
DCD 0xfffa0000, 0x005d0032, 0x0000fff7, 0x00000000
DCD 0xfff80001, 0x006c0024, 0x0002fff5, 0x00000000
DCD 0xffff0000, 0x007b000c, 0x0000fffa, 0x00000000
;DCD 0, 0, 128, 0, 0, 0
;DCD 0, -6, 123, 12, -1, 0
;DCD 2, -11, 108, 36, -8, 1
;DCD 0, -9, 93, 50, -6, 0
;DCD 3, -16, 77, 77, -16, 3
;DCD 0, -6, 50, 93, -9, 0
;DCD 1, -8, 36, 108, -11, 2
;DCD 0, -1, 12, 123, -6, 0
END

Просмотреть файл

@ -1,113 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include <math.h>
#include "vp8/common/filter.h"
#include "bilinearfilter_arm.h"
void vp8_filter_block2d_bil_armv6
(
unsigned char *src_ptr,
unsigned char *dst_ptr,
unsigned int src_pitch,
unsigned int dst_pitch,
const short *HFilter,
const short *VFilter,
int Width,
int Height
)
{
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
/* then 1-D vertically... */
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
}
void vp8_bilinear_predict4x4_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
}
void vp8_bilinear_predict8x8_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
}
void vp8_bilinear_predict8x4_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
}
void vp8_bilinear_predict16x16_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
}

Просмотреть файл

@ -1,43 +0,0 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
#define VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
#ifdef __cplusplus
extern "C" {
#endif
extern void vp8_filter_block2d_bil_first_pass_armv6
(
const unsigned char *src_ptr,
unsigned short *dst_ptr,
unsigned int src_pitch,
unsigned int height,
unsigned int width,
const short *vp8_filter
);
extern void vp8_filter_block2d_bil_second_pass_armv6
(
const unsigned short *src_ptr,
unsigned char *dst_ptr,
int dst_pitch,
unsigned int height,
unsigned int width,
const short *vp8_filter
);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_ARM_BILINEARFILTER_ARM_H_

Просмотреть файл

@ -1,25 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8/common/blockd.h"
#if HAVE_MEDIA
extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
{
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
}
#endif

Просмотреть файл

@ -1,221 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include <math.h>
#include "vp8/common/filter.h"
#include "vpx_ports/mem.h"
extern void vp8_filter_block2d_first_pass_armv6
(
unsigned char *src_ptr,
short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_width,
unsigned int output_height,
const short *vp8_filter
);
// 8x8
extern void vp8_filter_block2d_first_pass_8x8_armv6
(
unsigned char *src_ptr,
short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_width,
unsigned int output_height,
const short *vp8_filter
);
// 16x16
extern void vp8_filter_block2d_first_pass_16x16_armv6
(
unsigned char *src_ptr,
short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_width,
unsigned int output_height,
const short *vp8_filter
);
extern void vp8_filter_block2d_second_pass_armv6
(
short *src_ptr,
unsigned char *output_ptr,
unsigned int output_pitch,
unsigned int cnt,
const short *vp8_filter
);
extern void vp8_filter4_block2d_second_pass_armv6
(
short *src_ptr,
unsigned char *output_ptr,
unsigned int output_pitch,
unsigned int cnt,
const short *vp8_filter
);
extern void vp8_filter_block2d_first_pass_only_armv6
(
unsigned char *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
unsigned int cnt,
unsigned int output_pitch,
const short *vp8_filter
);
extern void vp8_filter_block2d_second_pass_only_armv6
(
unsigned char *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
unsigned int cnt,
unsigned int output_pitch,
const short *vp8_filter
);
#if HAVE_MEDIA
void vp8_sixtap_predict4x4_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* Vfilter is null. First pass only */
if (xoffset && !yoffset)
{
/*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
}
/* Hfilter is null. Second pass only */
else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
}
else
{
/* Vfilter is a 4 tap filter */
if (yoffset & 0x1)
{
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
}
/* Vfilter is 6 tap filter */
else
{
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
}
}
}
void vp8_sixtap_predict8x8_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
}
/* Hfilter is null. Second pass only */
else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
}
else
{
if (yoffset & 0x1)
{
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
}
else
{
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
}
}
}
void vp8_sixtap_predict16x16_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
}
/* Hfilter is null. Second pass only */
else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
}
else
{
if (yoffset & 0x1)
{
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
}
else
{
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
}
}
}
#endif

Просмотреть файл

@ -1,181 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/onyxc_int.h"
#define prototype_loopfilter(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh, int count)
#if HAVE_MEDIA
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
#endif
#if HAVE_NEON
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
unsigned char blimit, unsigned char limit, unsigned char thresh);
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
unsigned char blimit, unsigned char limit, unsigned char thresh,
unsigned char *v);
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
#endif
#if HAVE_MEDIA
/* ARMV6/MEDIA loopfilter functions*/
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
}
#endif
#if HAVE_NEON
/* NEON loopfilter functions */
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
unsigned char mblim = *lfi->mblim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
unsigned char mblim = *lfi->mblim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
unsigned char blim = *lfi->blim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
if (u_ptr)
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
unsigned char blim = *lfi->blim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
if (u_ptr)
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
}
#endif

Просмотреть файл

@ -1,699 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
static const uint8_t bifilter4_coeff[8][2] = {
{128, 0},
{112, 16},
{ 96, 32},
{ 80, 48},
{ 64, 64},
{ 48, 80},
{ 32, 96},
{ 16, 112}
};
void vp8_bilinear_predict4x4_neon(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch) {
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8;
uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8;
uint8x16_t q1u8, q2u8;
uint16x8_t q1u16, q2u16;
uint16x8_t q7u16, q8u16, q9u16;
uint64x2_t q4u64, q5u64;
uint64x1_t d12u64;
uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2;
if (xoffset == 0) { // skip_1stpass_filter
uint32x2_t d28u32 = vdup_n_u32(0);
uint32x2_t d29u32 = vdup_n_u32(0);
uint32x2_t d30u32 = vdup_n_u32(0);
d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0);
src_ptr += src_pixels_per_line;
d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1);
src_ptr += src_pixels_per_line;
d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0);
src_ptr += src_pixels_per_line;
d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1);
src_ptr += src_pixels_per_line;
d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0);
d28u8 = vreinterpret_u8_u32(d28u32);
d29u8 = vreinterpret_u8_u32(d29u32);
d30u8 = vreinterpret_u8_u32(d30u32);
} else {
d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d6u8 = vld1_u8(src_ptr);
q1u8 = vcombine_u8(d2u8, d3u8);
q2u8 = vcombine_u8(d4u8, d5u8);
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8);
d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)),
vreinterpret_u32_u8(vget_high_u8(q1u8)));
d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)),
vreinterpret_u32_u8(vget_high_u8(q2u8)));
d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)),
vreinterpret_u32_u64(vget_high_u64(q4u64)));
d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),
vreinterpret_u32_u64(vget_high_u64(q5u64)));
q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
q9u16 = vmull_u8(d6u8, d0u8);
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8);
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8);
q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8);
d28u8 = vqrshrn_n_u16(q7u16, 7);
d29u8 = vqrshrn_n_u16(q8u16, 7);
d30u8 = vqrshrn_n_u16(q9u16, 7);
}
// secondpass_filter
if (yoffset == 0) { // skip_2ndpass_filter
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1);
} else {
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
q1u16 = vmull_u8(d28u8, d0u8);
q2u16 = vmull_u8(d29u8, d0u8);
d26u8 = vext_u8(d28u8, d29u8, 4);
d27u8 = vext_u8(d29u8, d30u8, 4);
q1u16 = vmlal_u8(q1u16, d26u8, d1u8);
q2u16 = vmlal_u8(q2u16, d27u8, d1u8);
d2u8 = vqrshrn_n_u16(q1u16, 7);
d3u8 = vqrshrn_n_u16(q2u16, 7);
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
}
return;
}
void vp8_bilinear_predict8x4_neon(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch) {
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8;
uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8;
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
uint16x8_t q1u16, q2u16, q3u16, q4u16;
uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
if (xoffset == 0) { // skip_1stpass_filter
d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d26u8 = vld1_u8(src_ptr);
} else {
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q5u8 = vld1q_u8(src_ptr);
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
d22u8 = vqrshrn_n_u16(q6u16, 7);
d23u8 = vqrshrn_n_u16(q7u16, 7);
d24u8 = vqrshrn_n_u16(q8u16, 7);
d25u8 = vqrshrn_n_u16(q9u16, 7);
d26u8 = vqrshrn_n_u16(q10u16, 7);
}
// secondpass_filter
if (yoffset == 0) { // skip_2ndpass_filter
vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d25u8);
} else {
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
q1u16 = vmull_u8(d22u8, d0u8);
q2u16 = vmull_u8(d23u8, d0u8);
q3u16 = vmull_u8(d24u8, d0u8);
q4u16 = vmull_u8(d25u8, d0u8);
q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
d2u8 = vqrshrn_n_u16(q1u16, 7);
d3u8 = vqrshrn_n_u16(q2u16, 7);
d4u8 = vqrshrn_n_u16(q3u16, 7);
d5u8 = vqrshrn_n_u16(q4u16, 7);
vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d5u8);
}
return;
}
void vp8_bilinear_predict8x8_neon(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch) {
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8;
uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8;
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16;
uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
if (xoffset == 0) { // skip_1stpass_filter
d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d30u8 = vld1_u8(src_ptr);
} else {
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
d22u8 = vqrshrn_n_u16(q6u16, 7);
d23u8 = vqrshrn_n_u16(q7u16, 7);
d24u8 = vqrshrn_n_u16(q8u16, 7);
d25u8 = vqrshrn_n_u16(q9u16, 7);
// first_pass filtering on the rest 5-line data
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q5u8 = vld1q_u8(src_ptr);
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
d26u8 = vqrshrn_n_u16(q6u16, 7);
d27u8 = vqrshrn_n_u16(q7u16, 7);
d28u8 = vqrshrn_n_u16(q8u16, 7);
d29u8 = vqrshrn_n_u16(q9u16, 7);
d30u8 = vqrshrn_n_u16(q10u16, 7);
}
// secondpass_filter
if (yoffset == 0) { // skip_2ndpass_filter
vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d29u8);
} else {
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
q1u16 = vmull_u8(d22u8, d0u8);
q2u16 = vmull_u8(d23u8, d0u8);
q3u16 = vmull_u8(d24u8, d0u8);
q4u16 = vmull_u8(d25u8, d0u8);
q5u16 = vmull_u8(d26u8, d0u8);
q6u16 = vmull_u8(d27u8, d0u8);
q7u16 = vmull_u8(d28u8, d0u8);
q8u16 = vmull_u8(d29u8, d0u8);
q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
q5u16 = vmlal_u8(q5u16, d27u8, d1u8);
q6u16 = vmlal_u8(q6u16, d28u8, d1u8);
q7u16 = vmlal_u8(q7u16, d29u8, d1u8);
q8u16 = vmlal_u8(q8u16, d30u8, d1u8);
d2u8 = vqrshrn_n_u16(q1u16, 7);
d3u8 = vqrshrn_n_u16(q2u16, 7);
d4u8 = vqrshrn_n_u16(q3u16, 7);
d5u8 = vqrshrn_n_u16(q4u16, 7);
d6u8 = vqrshrn_n_u16(q5u16, 7);
d7u8 = vqrshrn_n_u16(q6u16, 7);
d8u8 = vqrshrn_n_u16(q7u16, 7);
d9u8 = vqrshrn_n_u16(q8u16, 7);
vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch;
vst1_u8((uint8_t *)dst_ptr, d9u8);
}
return;
}
void vp8_bilinear_predict16x16_neon(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch) {
int i;
unsigned char tmp[272];
unsigned char *tmpp;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8;
uint8x8_t d19u8, d20u8, d21u8;
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;
uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8;
uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16;
uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16;
if (xoffset == 0) { // secondpass_bfilter16x16_only
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
q11u8 = vld1q_u8(src_ptr);
src_ptr += src_pixels_per_line;
for (i = 4; i > 0; i--) {
q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
d2u8 = vqrshrn_n_u16(q1u16, 7);
d3u8 = vqrshrn_n_u16(q2u16, 7);
d4u8 = vqrshrn_n_u16(q3u16, 7);
d5u8 = vqrshrn_n_u16(q4u16, 7);
d6u8 = vqrshrn_n_u16(q5u16, 7);
d7u8 = vqrshrn_n_u16(q6u16, 7);
d8u8 = vqrshrn_n_u16(q7u16, 7);
d9u8 = vqrshrn_n_u16(q8u16, 7);
q1u8 = vcombine_u8(d2u8, d3u8);
q2u8 = vcombine_u8(d4u8, d5u8);
q3u8 = vcombine_u8(d6u8, d7u8);
q4u8 = vcombine_u8(d8u8, d9u8);
q11u8 = q15u8;
vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
}
return;
}
if (yoffset == 0) { // firstpass_bfilter16x16_only
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
for (i = 4; i > 0 ; i--) {
d2u8 = vld1_u8(src_ptr);
d3u8 = vld1_u8(src_ptr + 8);
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
d5u8 = vld1_u8(src_ptr);
d6u8 = vld1_u8(src_ptr + 8);
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
d8u8 = vld1_u8(src_ptr);
d9u8 = vld1_u8(src_ptr + 8);
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
d11u8 = vld1_u8(src_ptr);
d12u8 = vld1_u8(src_ptr + 8);
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
q7u16 = vmull_u8(d2u8, d0u8);
q8u16 = vmull_u8(d3u8, d0u8);
q9u16 = vmull_u8(d5u8, d0u8);
q10u16 = vmull_u8(d6u8, d0u8);
q11u16 = vmull_u8(d8u8, d0u8);
q12u16 = vmull_u8(d9u8, d0u8);
q13u16 = vmull_u8(d11u8, d0u8);
q14u16 = vmull_u8(d12u8, d0u8);
d2u8 = vext_u8(d2u8, d3u8, 1);
d5u8 = vext_u8(d5u8, d6u8, 1);
d8u8 = vext_u8(d8u8, d9u8, 1);
d11u8 = vext_u8(d11u8, d12u8, 1);
q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
d3u8 = vext_u8(d3u8, d4u8, 1);
d6u8 = vext_u8(d6u8, d7u8, 1);
d9u8 = vext_u8(d9u8, d10u8, 1);
d12u8 = vext_u8(d12u8, d13u8, 1);
q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
d14u8 = vqrshrn_n_u16(q7u16, 7);
d15u8 = vqrshrn_n_u16(q8u16, 7);
d16u8 = vqrshrn_n_u16(q9u16, 7);
d17u8 = vqrshrn_n_u16(q10u16, 7);
d18u8 = vqrshrn_n_u16(q11u16, 7);
d19u8 = vqrshrn_n_u16(q12u16, 7);
d20u8 = vqrshrn_n_u16(q13u16, 7);
d21u8 = vqrshrn_n_u16(q14u16, 7);
q7u8 = vcombine_u8(d14u8, d15u8);
q8u8 = vcombine_u8(d16u8, d17u8);
q9u8 = vcombine_u8(d18u8, d19u8);
q10u8 =vcombine_u8(d20u8, d21u8);
vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch;
vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch;
vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch;
vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch;
}
return;
}
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
d2u8 = vld1_u8(src_ptr);
d3u8 = vld1_u8(src_ptr + 8);
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
d5u8 = vld1_u8(src_ptr);
d6u8 = vld1_u8(src_ptr + 8);
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
d8u8 = vld1_u8(src_ptr);
d9u8 = vld1_u8(src_ptr + 8);
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
d11u8 = vld1_u8(src_ptr);
d12u8 = vld1_u8(src_ptr + 8);
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
// First Pass: output_height lines x output_width columns (17x16)
tmpp = tmp;
for (i = 3; i > 0; i--) {
q7u16 = vmull_u8(d2u8, d0u8);
q8u16 = vmull_u8(d3u8, d0u8);
q9u16 = vmull_u8(d5u8, d0u8);
q10u16 = vmull_u8(d6u8, d0u8);
q11u16 = vmull_u8(d8u8, d0u8);
q12u16 = vmull_u8(d9u8, d0u8);
q13u16 = vmull_u8(d11u8, d0u8);
q14u16 = vmull_u8(d12u8, d0u8);
d2u8 = vext_u8(d2u8, d3u8, 1);
d5u8 = vext_u8(d5u8, d6u8, 1);
d8u8 = vext_u8(d8u8, d9u8, 1);
d11u8 = vext_u8(d11u8, d12u8, 1);
q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
d3u8 = vext_u8(d3u8, d4u8, 1);
d6u8 = vext_u8(d6u8, d7u8, 1);
d9u8 = vext_u8(d9u8, d10u8, 1);
d12u8 = vext_u8(d12u8, d13u8, 1);
q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
d14u8 = vqrshrn_n_u16(q7u16, 7);
d15u8 = vqrshrn_n_u16(q8u16, 7);
d16u8 = vqrshrn_n_u16(q9u16, 7);
d17u8 = vqrshrn_n_u16(q10u16, 7);
d18u8 = vqrshrn_n_u16(q11u16, 7);
d19u8 = vqrshrn_n_u16(q12u16, 7);
d20u8 = vqrshrn_n_u16(q13u16, 7);
d21u8 = vqrshrn_n_u16(q14u16, 7);
d2u8 = vld1_u8(src_ptr);
d3u8 = vld1_u8(src_ptr + 8);
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
d5u8 = vld1_u8(src_ptr);
d6u8 = vld1_u8(src_ptr + 8);
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
d8u8 = vld1_u8(src_ptr);
d9u8 = vld1_u8(src_ptr + 8);
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
d11u8 = vld1_u8(src_ptr);
d12u8 = vld1_u8(src_ptr + 8);
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
q7u8 = vcombine_u8(d14u8, d15u8);
q8u8 = vcombine_u8(d16u8, d17u8);
q9u8 = vcombine_u8(d18u8, d19u8);
q10u8 = vcombine_u8(d20u8, d21u8);
vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16;
vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16;
}
// First-pass filtering for rest 5 lines
d14u8 = vld1_u8(src_ptr);
d15u8 = vld1_u8(src_ptr + 8);
d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
q9u16 = vmull_u8(d2u8, d0u8);
q10u16 = vmull_u8(d3u8, d0u8);
q11u16 = vmull_u8(d5u8, d0u8);
q12u16 = vmull_u8(d6u8, d0u8);
q13u16 = vmull_u8(d8u8, d0u8);
q14u16 = vmull_u8(d9u8, d0u8);
d2u8 = vext_u8(d2u8, d3u8, 1);
d5u8 = vext_u8(d5u8, d6u8, 1);
d8u8 = vext_u8(d8u8, d9u8, 1);
q9u16 = vmlal_u8(q9u16, d2u8, d1u8);
q11u16 = vmlal_u8(q11u16, d5u8, d1u8);
q13u16 = vmlal_u8(q13u16, d8u8, d1u8);
d3u8 = vext_u8(d3u8, d4u8, 1);
d6u8 = vext_u8(d6u8, d7u8, 1);
d9u8 = vext_u8(d9u8, d10u8, 1);
q10u16 = vmlal_u8(q10u16, d3u8, d1u8);
q12u16 = vmlal_u8(q12u16, d6u8, d1u8);
q14u16 = vmlal_u8(q14u16, d9u8, d1u8);
q1u16 = vmull_u8(d11u8, d0u8);
q2u16 = vmull_u8(d12u8, d0u8);
q3u16 = vmull_u8(d14u8, d0u8);
q4u16 = vmull_u8(d15u8, d0u8);
d11u8 = vext_u8(d11u8, d12u8, 1);
d14u8 = vext_u8(d14u8, d15u8, 1);
q1u16 = vmlal_u8(q1u16, d11u8, d1u8);
q3u16 = vmlal_u8(q3u16, d14u8, d1u8);
d12u8 = vext_u8(d12u8, d13u8, 1);
d15u8 = vext_u8(d15u8, d16u8, 1);
q2u16 = vmlal_u8(q2u16, d12u8, d1u8);
q4u16 = vmlal_u8(q4u16, d15u8, d1u8);
d10u8 = vqrshrn_n_u16(q9u16, 7);
d11u8 = vqrshrn_n_u16(q10u16, 7);
d12u8 = vqrshrn_n_u16(q11u16, 7);
d13u8 = vqrshrn_n_u16(q12u16, 7);
d14u8 = vqrshrn_n_u16(q13u16, 7);
d15u8 = vqrshrn_n_u16(q14u16, 7);
d16u8 = vqrshrn_n_u16(q1u16, 7);
d17u8 = vqrshrn_n_u16(q2u16, 7);
d18u8 = vqrshrn_n_u16(q3u16, 7);
d19u8 = vqrshrn_n_u16(q4u16, 7);
q5u8 = vcombine_u8(d10u8, d11u8);
q6u8 = vcombine_u8(d12u8, d13u8);
q7u8 = vcombine_u8(d14u8, d15u8);
q8u8 = vcombine_u8(d16u8, d17u8);
q9u8 = vcombine_u8(d18u8, d19u8);
vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16;
vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16;
vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
vst1q_u8((uint8_t *)tmpp, q9u8);
// secondpass_filter
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
tmpp = tmp;
q11u8 = vld1q_u8(tmpp);
tmpp += 16;
for (i = 4; i > 0; i--) {
q12u8 = vld1q_u8(tmpp); tmpp += 16;
q13u8 = vld1q_u8(tmpp); tmpp += 16;
q14u8 = vld1q_u8(tmpp); tmpp += 16;
q15u8 = vld1q_u8(tmpp); tmpp += 16;
q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
d2u8 = vqrshrn_n_u16(q1u16, 7);
d3u8 = vqrshrn_n_u16(q2u16, 7);
d4u8 = vqrshrn_n_u16(q3u16, 7);
d5u8 = vqrshrn_n_u16(q4u16, 7);
d6u8 = vqrshrn_n_u16(q5u16, 7);
d7u8 = vqrshrn_n_u16(q6u16, 7);
d8u8 = vqrshrn_n_u16(q7u16, 7);
d9u8 = vqrshrn_n_u16(q8u16, 7);
q1u8 = vcombine_u8(d2u8, d3u8);
q2u8 = vcombine_u8(d4u8, d5u8);
q3u8 = vcombine_u8(d6u8, d7u8);
q4u8 = vcombine_u8(d8u8, d9u8);
q11u8 = q15u8;
vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
}
return;
}

Просмотреть файл

@ -1,59 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
void vp8_copy_mem8x4_neon(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride) {
uint8x8_t vtmp;
int r;
for (r = 0; r < 4; r++) {
vtmp = vld1_u8(src);
vst1_u8(dst, vtmp);
src += src_stride;
dst += dst_stride;
}
}
void vp8_copy_mem8x8_neon(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride) {
uint8x8_t vtmp;
int r;
for (r = 0; r < 8; r++) {
vtmp = vld1_u8(src);
vst1_u8(dst, vtmp);
src += src_stride;
dst += dst_stride;
}
}
void vp8_copy_mem16x16_neon(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride) {
int r;
uint8x16_t qtmp;
for (r = 0; r < 16; r++) {
qtmp = vld1q_u8(src);
vst1q_u8(dst, qtmp);
src += src_stride;
dst += dst_stride;
}
}

Просмотреть файл

@ -1,42 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
void vp8_dc_only_idct_add_neon(
int16_t input_dc,
unsigned char *pred_ptr,
int pred_stride,
unsigned char *dst_ptr,
int dst_stride) {
int i;
uint16_t a1 = ((input_dc + 4) >> 3);
uint32x2_t d2u32 = vdup_n_u32(0);
uint8x8_t d2u8;
uint16x8_t q1u16;
uint16x8_t qAdd;
qAdd = vdupq_n_u16(a1);
for (i = 0; i < 2; i++) {
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
pred_ptr += pred_stride;
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
pred_ptr += pred_stride;
q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
dst_ptr += dst_stride;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
dst_ptr += dst_stride;
}
}

Просмотреть файл

@ -1,142 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
static const int16_t cospi8sqrt2minus1 = 20091;
static const int16_t sinpi8sqrt2 = 35468;
void vp8_dequant_idct_add_neon(
int16_t *input,
int16_t *dq,
unsigned char *dst,
int stride) {
unsigned char *dst0;
int32x2_t d14, d15;
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
int16x8_t q1, q2, q3, q4, q5, q6;
int16x8_t qEmpty = vdupq_n_s16(0);
int32x2x2_t d2tmp0, d2tmp1;
int16x4x2_t d2tmp2, d2tmp3;
d14 = d15 = vdup_n_s32(0);
// load input
q3 = vld1q_s16(input);
vst1q_s16(input, qEmpty);
input += 8;
q4 = vld1q_s16(input);
vst1q_s16(input, qEmpty);
// load dq
q5 = vld1q_s16(dq);
dq += 8;
q6 = vld1q_s16(dq);
// load src from dst
dst0 = dst;
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
dst0 += stride;
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
dst0 += stride;
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
dst0 += stride;
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
vreinterpretq_u16_s16(q5)));
q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
vreinterpretq_u16_s16(q6)));
d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
q3 = vshrq_n_s16(q3, 1);
q4 = vshrq_n_s16(q4, 1);
q3 = vqaddq_s16(q3, q2);
q4 = vqaddq_s16(q4, q2);
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
vreinterpret_s16_s32(d2tmp1.val[0]));
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
vreinterpret_s16_s32(d2tmp1.val[1]));
// loop 2
q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
q3 = vshrq_n_s16(q3, 1);
q4 = vshrq_n_s16(q4, 1);
q3 = vqaddq_s16(q3, q2);
q4 = vqaddq_s16(q4, q2);
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2 = vrshr_n_s16(d2, 3);
d3 = vrshr_n_s16(d3, 3);
d4 = vrshr_n_s16(d4, 3);
d5 = vrshr_n_s16(d5, 3);
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
vreinterpret_s16_s32(d2tmp1.val[0]));
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
vreinterpret_s16_s32(d2tmp1.val[1]));
q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
vreinterpret_u8_s32(d14)));
q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
vreinterpret_u8_s32(d15)));
d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
dst0 = dst;
vst1_lane_s32((int32_t *)dst0, d14, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d14, 1);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d15, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d15, 1);
return;
}

Просмотреть файл

@ -1,25 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "vp8/common/blockd.h"
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
int16x8x2_t qQ, qDQC, qDQ;
qQ = vld2q_s16(d->qcoeff);
qDQC = vld2q_s16(DQC);
qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
vst2q_s16(d->dqcoeff, qDQ);
}

Просмотреть файл

@ -1,96 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
/* place these declarations here because we don't want to maintain them
* outside of this scope
*/
void idct_dequant_full_2x_neon(short *q, short *dq,
unsigned char *dst, int stride);
void idct_dequant_0_2x_neon(short *q, short dq,
unsigned char *dst, int stride);
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq,
unsigned char *dst,
int stride, char *eobs)
{
int i;
for (i = 0; i < 4; i++)
{
if (((short *)(eobs))[0])
{
if (((short *)eobs)[0] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dst, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dst, stride);
}
if (((short *)(eobs))[1])
{
if (((short *)eobs)[1] & 0xfefe)
idct_dequant_full_2x_neon (q+32, dq, dst+8, stride);
else
idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride);
}
q += 64;
dst += 4*stride;
eobs += 4;
}
}
void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,
unsigned char *dstu,
unsigned char *dstv,
int stride, char *eobs)
{
if (((short *)(eobs))[0])
{
if (((short *)eobs)[0] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dstu, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
}
q += 32;
dstu += 4*stride;
if (((short *)(eobs))[1])
{
if (((short *)eobs)[1] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dstu, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
}
q += 32;
if (((short *)(eobs))[2])
{
if (((short *)eobs)[2] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dstv, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
}
q += 32;
dstv += 4*stride;
if (((short *)(eobs))[3])
{
if (((short *)eobs)[3] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dstv, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
}
}

Просмотреть файл

@ -1,63 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
void idct_dequant_0_2x_neon(
int16_t *q,
int16_t dq,
unsigned char *dst,
int stride) {
unsigned char *dst0;
int i, a0, a1;
int16x8x2_t q2Add;
int32x2_t d2s32 = vdup_n_s32(0),
d4s32 = vdup_n_s32(0);
uint8x8_t d2u8, d4u8;
uint16x8_t q1u16, q2u16;
a0 = ((q[0] * dq) + 4) >> 3;
a1 = ((q[16] * dq) + 4) >> 3;
q[0] = q[16] = 0;
q2Add.val[0] = vdupq_n_s16((int16_t)a0);
q2Add.val[1] = vdupq_n_s16((int16_t)a1);
for (i = 0; i < 2; i++, dst += 4) {
dst0 = dst;
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
dst0 += stride;
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
dst0 += stride;
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
dst0 += stride;
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
vreinterpret_u8_s32(d2s32));
q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
vreinterpret_u8_s32(d4s32));
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
d2s32 = vreinterpret_s32_u8(d2u8);
d4s32 = vreinterpret_s32_u8(d4u8);
dst0 = dst;
vst1_lane_s32((int32_t *)dst0, d2s32, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d2s32, 1);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d4s32, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d4s32, 1);
}
return;
}

Просмотреть файл

@ -1,185 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
static const int16_t cospi8sqrt2minus1 = 20091;
static const int16_t sinpi8sqrt2 = 17734;
// because the lowest bit in 0x8a8c is 0, we can pre-shift this
void idct_dequant_full_2x_neon(
int16_t *q,
int16_t *dq,
unsigned char *dst,
int stride) {
unsigned char *dst0, *dst1;
int32x2_t d28, d29, d30, d31;
int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
int16x8_t qEmpty = vdupq_n_s16(0);
int32x4x2_t q2tmp0, q2tmp1;
int16x8x2_t q2tmp2, q2tmp3;
int16x4_t dLow0, dLow1, dHigh0, dHigh1;
d28 = d29 = d30 = d31 = vdup_n_s32(0);
// load dq
q0 = vld1q_s16(dq);
dq += 8;
q1 = vld1q_s16(dq);
// load q
q2 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
q += 8;
q3 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
q += 8;
q4 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
q += 8;
q5 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
// load src from dst
dst0 = dst;
dst1 = dst + 4;
d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
dst0 += stride;
d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
dst1 += stride;
d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
dst0 += stride;
d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
dst1 += stride;
d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
dst0 += stride;
d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
dst1 += stride;
d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
q2 = vmulq_s16(q2, q0);
q3 = vmulq_s16(q3, q1);
q4 = vmulq_s16(q4, q0);
q5 = vmulq_s16(q5, q1);
// vswp
dLow0 = vget_low_s16(q2);
dHigh0 = vget_high_s16(q2);
dLow1 = vget_low_s16(q4);
dHigh1 = vget_high_s16(q4);
q2 = vcombine_s16(dLow0, dLow1);
q4 = vcombine_s16(dHigh0, dHigh1);
dLow0 = vget_low_s16(q3);
dHigh0 = vget_high_s16(q3);
dLow1 = vget_low_s16(q5);
dHigh1 = vget_high_s16(q5);
q3 = vcombine_s16(dLow0, dLow1);
q5 = vcombine_s16(dHigh0, dHigh1);
q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
q10 = vqaddq_s16(q2, q3);
q11 = vqsubq_s16(q2, q3);
q8 = vshrq_n_s16(q8, 1);
q9 = vshrq_n_s16(q9, 1);
q4 = vqaddq_s16(q4, q8);
q5 = vqaddq_s16(q5, q9);
q2 = vqsubq_s16(q6, q5);
q3 = vqaddq_s16(q7, q4);
q4 = vqaddq_s16(q10, q3);
q5 = vqaddq_s16(q11, q2);
q6 = vqsubq_s16(q11, q2);
q7 = vqsubq_s16(q10, q3);
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
vreinterpretq_s16_s32(q2tmp1.val[0]));
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
vreinterpretq_s16_s32(q2tmp1.val[1]));
// loop 2
q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
q10 = vshrq_n_s16(q10, 1);
q11 = vshrq_n_s16(q11, 1);
q10 = vqaddq_s16(q2tmp2.val[1], q10);
q11 = vqaddq_s16(q2tmp3.val[1], q11);
q8 = vqsubq_s16(q8, q11);
q9 = vqaddq_s16(q9, q10);
q4 = vqaddq_s16(q2, q9);
q5 = vqaddq_s16(q3, q8);
q6 = vqsubq_s16(q3, q8);
q7 = vqsubq_s16(q2, q9);
q4 = vrshrq_n_s16(q4, 3);
q5 = vrshrq_n_s16(q5, 3);
q6 = vrshrq_n_s16(q6, 3);
q7 = vrshrq_n_s16(q7, 3);
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
vreinterpretq_s16_s32(q2tmp1.val[0]));
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
vreinterpretq_s16_s32(q2tmp1.val[1]));
q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
vreinterpret_u8_s32(d28)));
q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
vreinterpret_u8_s32(d29)));
q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
vreinterpret_u8_s32(d30)));
q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
vreinterpret_u8_s32(d31)));
d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
dst0 = dst;
dst1 = dst + 4;
vst1_lane_s32((int32_t *)dst0, d28, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst1, d28, 1);
dst1 += stride;
vst1_lane_s32((int32_t *)dst0, d29, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst1, d29, 1);
dst1 += stride;
vst1_lane_s32((int32_t *)dst0, d30, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst1, d30, 1);
dst1 += stride;
vst1_lane_s32((int32_t *)dst0, d31, 0);
vst1_lane_s32((int32_t *)dst1, d31, 1);
return;
}

Просмотреть файл

@ -1,102 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
void vp8_short_inv_walsh4x4_neon(
int16_t *input,
int16_t *mb_dqcoeff) {
int16x8_t q0s16, q1s16, q2s16, q3s16;
int16x4_t d4s16, d5s16, d6s16, d7s16;
int16x4x2_t v2tmp0, v2tmp1;
int32x2x2_t v2tmp2, v2tmp3;
int16x8_t qAdd3;
q0s16 = vld1q_s16(input);
q1s16 = vld1q_s16(input + 8);
// 1st for loop
d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
q2s16 = vcombine_s16(d4s16, d5s16);
q3s16 = vcombine_s16(d6s16, d7s16);
q0s16 = vaddq_s16(q2s16, q3s16);
q1s16 = vsubq_s16(q2s16, q3s16);
v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
vreinterpret_s32_s16(vget_low_s16(q1s16)));
v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
vreinterpret_s32_s16(vget_high_s16(q1s16)));
v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
vreinterpret_s16_s32(v2tmp3.val[0]));
v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
vreinterpret_s16_s32(v2tmp3.val[1]));
// 2nd for loop
d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
q2s16 = vcombine_s16(d4s16, d5s16);
q3s16 = vcombine_s16(d6s16, d7s16);
qAdd3 = vdupq_n_s16(3);
q0s16 = vaddq_s16(q2s16, q3s16);
q1s16 = vsubq_s16(q2s16, q3s16);
q0s16 = vaddq_s16(q0s16, qAdd3);
q1s16 = vaddq_s16(q1s16, qAdd3);
q0s16 = vshrq_n_s16(q0s16, 3);
q1s16 = vshrq_n_s16(q1s16, 3);
// store
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
mb_dqcoeff += 16;
return;
}

Просмотреть файл

@ -1,111 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "./vpx_config.h"
static INLINE void vp8_loop_filter_simple_horizontal_edge_neon(
unsigned char *s,
int p,
const unsigned char *blimit) {
uint8_t *sp;
uint8x16_t qblimit, q0u8;
uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
int16x8_t q2s16, q3s16, q13s16;
int8x8_t d8s8, d9s8;
int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
qblimit = vdupq_n_u8(*blimit);
sp = s - (p << 1);
q5u8 = vld1q_u8(sp);
sp += p;
q6u8 = vld1q_u8(sp);
sp += p;
q7u8 = vld1q_u8(sp);
sp += p;
q8u8 = vld1q_u8(sp);
q15u8 = vabdq_u8(q6u8, q7u8);
q14u8 = vabdq_u8(q5u8, q8u8);
q15u8 = vqaddq_u8(q15u8, q15u8);
q14u8 = vshrq_n_u8(q14u8, 1);
q0u8 = vdupq_n_u8(0x80);
q13s16 = vdupq_n_s16(3);
q15u8 = vqaddq_u8(q15u8, q14u8);
q5u8 = veorq_u8(q5u8, q0u8);
q6u8 = veorq_u8(q6u8, q0u8);
q7u8 = veorq_u8(q7u8, q0u8);
q8u8 = veorq_u8(q8u8, q0u8);
q15u8 = vcgeq_u8(qblimit, q15u8);
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
vget_low_s8(vreinterpretq_s8_u8(q6u8)));
q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
vget_high_s8(vreinterpretq_s8_u8(q6u8)));
q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8),
vreinterpretq_s8_u8(q8u8));
q2s16 = vmulq_s16(q2s16, q13s16);
q3s16 = vmulq_s16(q3s16, q13s16);
q10u8 = vdupq_n_u8(3);
q9u8 = vdupq_n_u8(4);
q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
d8s8 = vqmovn_s16(q2s16);
d9s8 = vqmovn_s16(q3s16);
q4s8 = vcombine_s8(d8s8, d9s8);
q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
q2s8 = vshrq_n_s8(q2s8, 3);
q3s8 = vshrq_n_s8(q3s8, 3);
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
vst1q_u8(s, q7u8);
s -= p;
vst1q_u8(s, q6u8);
return;
}
void vp8_loop_filter_bhs_neon(
unsigned char *y_ptr,
int y_stride,
const unsigned char *blimit) {
y_ptr += y_stride * 4;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
y_ptr += y_stride * 4;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
y_ptr += y_stride * 4;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
return;
}
void vp8_loop_filter_mbhs_neon(
unsigned char *y_ptr,
int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
return;
}

Просмотреть файл

@ -1,283 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "./vpx_config.h"
#include "vpx_ports/arm.h"
#ifdef VPX_INCOMPATIBLE_GCC
static INLINE void write_2x4(unsigned char *dst, int pitch,
const uint8x8x2_t result) {
/*
* uint8x8x2_t result
00 01 02 03 | 04 05 06 07
10 11 12 13 | 14 15 16 17
---
* after vtrn_u8
00 10 02 12 | 04 14 06 16
01 11 03 13 | 05 15 07 17
*/
const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0],
result.val[1]);
const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]);
const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]);
vst1_lane_u16((uint16_t *)dst, x_0_4, 0);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 0);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_0_4, 1);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 1);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_0_4, 2);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 2);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_0_4, 3);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
}
static INLINE void write_2x8(unsigned char *dst, int pitch,
const uint8x8x2_t result,
const uint8x8x2_t result2) {
write_2x4(dst, pitch, result);
dst += pitch * 8;
write_2x4(dst, pitch, result2);
}
#else
static INLINE void write_2x8(unsigned char *dst, int pitch,
const uint8x8x2_t result,
const uint8x8x2_t result2) {
vst2_lane_u8(dst, result, 0);
dst += pitch;
vst2_lane_u8(dst, result, 1);
dst += pitch;
vst2_lane_u8(dst, result, 2);
dst += pitch;
vst2_lane_u8(dst, result, 3);
dst += pitch;
vst2_lane_u8(dst, result, 4);
dst += pitch;
vst2_lane_u8(dst, result, 5);
dst += pitch;
vst2_lane_u8(dst, result, 6);
dst += pitch;
vst2_lane_u8(dst, result, 7);
dst += pitch;
vst2_lane_u8(dst, result2, 0);
dst += pitch;
vst2_lane_u8(dst, result2, 1);
dst += pitch;
vst2_lane_u8(dst, result2, 2);
dst += pitch;
vst2_lane_u8(dst, result2, 3);
dst += pitch;
vst2_lane_u8(dst, result2, 4);
dst += pitch;
vst2_lane_u8(dst, result2, 5);
dst += pitch;
vst2_lane_u8(dst, result2, 6);
dst += pitch;
vst2_lane_u8(dst, result2, 7);
}
#endif // VPX_INCOMPATIBLE_GCC
#ifdef VPX_INCOMPATIBLE_GCC
static INLINE
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
uint8x8x4_t x;
const uint8x8_t a = vld1_u8(src);
const uint8x8_t b = vld1_u8(src + pitch * 1);
const uint8x8_t c = vld1_u8(src + pitch * 2);
const uint8x8_t d = vld1_u8(src + pitch * 3);
const uint8x8_t e = vld1_u8(src + pitch * 4);
const uint8x8_t f = vld1_u8(src + pitch * 5);
const uint8x8_t g = vld1_u8(src + pitch * 6);
const uint8x8_t h = vld1_u8(src + pitch * 7);
const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a),
vreinterpret_u32_u8(e));
const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b),
vreinterpret_u32_u8(f));
const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c),
vreinterpret_u32_u8(g));
const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d),
vreinterpret_u32_u8(h));
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]),
vreinterpret_u16_u32(r26_u32.val[0]));
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]),
vreinterpret_u16_u32(r37_u32.val[0]));
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
vreinterpret_u8_u16(r13_u16.val[0]));
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
vreinterpret_u8_u16(r13_u16.val[1]));
/*
* after vtrn_u32
00 01 02 03 | 40 41 42 43
10 11 12 13 | 50 51 52 53
20 21 22 23 | 60 61 62 63
30 31 32 33 | 70 71 72 73
---
* after vtrn_u16
00 01 20 21 | 40 41 60 61
02 03 22 23 | 42 43 62 63
10 11 30 31 | 50 51 70 71
12 13 32 33 | 52 52 72 73
00 01 20 21 | 40 41 60 61
10 11 30 31 | 50 51 70 71
02 03 22 23 | 42 43 62 63
12 13 32 33 | 52 52 72 73
---
* after vtrn_u8
00 10 20 30 | 40 50 60 70
01 11 21 31 | 41 51 61 71
02 12 22 32 | 42 52 62 72
03 13 23 33 | 43 53 63 73
*/
x.val[0] = r01_u8.val[0];
x.val[1] = r01_u8.val[1];
x.val[2] = r23_u8.val[0];
x.val[3] = r23_u8.val[1];
return x;
}
#else
static INLINE
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
uint8x8x4_t x;
x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0);
x = vld4_lane_u8(src, x, 0);
src += pitch;
x = vld4_lane_u8(src, x, 1);
src += pitch;
x = vld4_lane_u8(src, x, 2);
src += pitch;
x = vld4_lane_u8(src, x, 3);
src += pitch;
x = vld4_lane_u8(src, x, 4);
src += pitch;
x = vld4_lane_u8(src, x, 5);
src += pitch;
x = vld4_lane_u8(src, x, 6);
src += pitch;
x = vld4_lane_u8(src, x, 7);
return x;
}
#endif // VPX_INCOMPATIBLE_GCC
static INLINE void vp8_loop_filter_simple_vertical_edge_neon(
unsigned char *s,
int p,
const unsigned char *blimit) {
unsigned char *src1;
uint8x16_t qblimit, q0u8;
uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
int16x8_t q2s16, q13s16, q11s16;
int8x8_t d28s8, d29s8;
int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8;
uint8x8x4_t d0u8x4; // d6, d7, d8, d9
uint8x8x4_t d1u8x4; // d10, d11, d12, d13
uint8x8x2_t d2u8x2; // d12, d13
uint8x8x2_t d3u8x2; // d14, d15
qblimit = vdupq_n_u8(*blimit);
src1 = s - 2;
d0u8x4 = read_4x8(src1, p);
src1 += p * 8;
d1u8x4 = read_4x8(src1, p);
q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10
q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12
q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11
q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13
q15u8 = vabdq_u8(q5u8, q4u8);
q14u8 = vabdq_u8(q3u8, q6u8);
q15u8 = vqaddq_u8(q15u8, q15u8);
q14u8 = vshrq_n_u8(q14u8, 1);
q0u8 = vdupq_n_u8(0x80);
q11s16 = vdupq_n_s16(3);
q15u8 = vqaddq_u8(q15u8, q14u8);
q3u8 = veorq_u8(q3u8, q0u8);
q4u8 = veorq_u8(q4u8, q0u8);
q5u8 = veorq_u8(q5u8, q0u8);
q6u8 = veorq_u8(q6u8, q0u8);
q15u8 = vcgeq_u8(qblimit, q15u8);
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)),
vget_low_s8(vreinterpretq_s8_u8(q5u8)));
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)),
vget_high_s8(vreinterpretq_s8_u8(q5u8)));
q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8),
vreinterpretq_s8_u8(q6u8));
q2s16 = vmulq_s16(q2s16, q11s16);
q13s16 = vmulq_s16(q13s16, q11s16);
q11u8 = vdupq_n_u8(3);
q12u8 = vdupq_n_u8(4);
q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8));
q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8));
d28s8 = vqmovn_s16(q2s16);
d29s8 = vqmovn_s16(q13s16);
q14s8 = vcombine_s8(d28s8, d29s8);
q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8));
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8));
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8));
q2s8 = vshrq_n_s8(q2s8, 3);
q14s8 = vshrq_n_s8(q3s8, 3);
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8);
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8);
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
d2u8x2.val[0] = vget_low_u8(q6u8); // d12
d2u8x2.val[1] = vget_low_u8(q7u8); // d14
d3u8x2.val[0] = vget_high_u8(q6u8); // d13
d3u8x2.val[1] = vget_high_u8(q7u8); // d15
src1 = s - 1;
write_2x8(src1, p, d2u8x2, d3u8x2);
}
void vp8_loop_filter_bvs_neon(
unsigned char *y_ptr,
int y_stride,
const unsigned char *blimit) {
y_ptr += 4;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
y_ptr += 4;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
y_ptr += 4;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
return;
}
void vp8_loop_filter_mbvs_neon(
unsigned char *y_ptr,
int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
return;
}

Просмотреть файл

@ -1,625 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "./vpx_config.h"
static INLINE void vp8_mbloop_filter_neon(
uint8x16_t qblimit, // mblimit
uint8x16_t qlimit, // limit
uint8x16_t qthresh, // thresh
uint8x16_t q3, // p2
uint8x16_t q4, // p2
uint8x16_t q5, // p1
uint8x16_t q6, // p0
uint8x16_t q7, // q0
uint8x16_t q8, // q1
uint8x16_t q9, // q2
uint8x16_t q10, // q3
uint8x16_t *q4r, // p1
uint8x16_t *q5r, // p1
uint8x16_t *q6r, // p0
uint8x16_t *q7r, // q0
uint8x16_t *q8r, // q1
uint8x16_t *q9r) { // q1
uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8;
int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16;
int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8;
uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16;
int8x16_t q0s8, q12s8, q14s8, q15s8;
int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29;
q11u8 = vabdq_u8(q3, q4);
q12u8 = vabdq_u8(q4, q5);
q13u8 = vabdq_u8(q5, q6);
q14u8 = vabdq_u8(q8, q7);
q1u8 = vabdq_u8(q9, q8);
q0u8 = vabdq_u8(q10, q9);
q11u8 = vmaxq_u8(q11u8, q12u8);
q12u8 = vmaxq_u8(q13u8, q14u8);
q1u8 = vmaxq_u8(q1u8, q0u8);
q15u8 = vmaxq_u8(q11u8, q12u8);
q12u8 = vabdq_u8(q6, q7);
// vp8_hevmask
q13u8 = vcgtq_u8(q13u8, qthresh);
q14u8 = vcgtq_u8(q14u8, qthresh);
q15u8 = vmaxq_u8(q15u8, q1u8);
q15u8 = vcgeq_u8(qlimit, q15u8);
q1u8 = vabdq_u8(q5, q8);
q12u8 = vqaddq_u8(q12u8, q12u8);
// vp8_filter() function
// convert to signed
q0u8 = vdupq_n_u8(0x80);
q9 = veorq_u8(q9, q0u8);
q8 = veorq_u8(q8, q0u8);
q7 = veorq_u8(q7, q0u8);
q6 = veorq_u8(q6, q0u8);
q5 = veorq_u8(q5, q0u8);
q4 = veorq_u8(q4, q0u8);
q1u8 = vshrq_n_u8(q1u8, 1);
q12u8 = vqaddq_u8(q12u8, q1u8);
q14u8 = vorrq_u8(q13u8, q14u8);
q12u8 = vcgeq_u8(qblimit, q12u8);
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
vget_low_s8(vreinterpretq_s8_u8(q6)));
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
vget_high_s8(vreinterpretq_s8_u8(q6)));
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
vreinterpretq_s8_u8(q8));
q11s16 = vdupq_n_s16(3);
q2s16 = vmulq_s16(q2s16, q11s16);
q13s16 = vmulq_s16(q13s16, q11s16);
q15u8 = vandq_u8(q15u8, q12u8);
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8));
q12u8 = vdupq_n_u8(3);
q11u8 = vdupq_n_u8(4);
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
d2 = vqmovn_s16(q2s16);
d3 = vqmovn_s16(q13s16);
q1s8 = vcombine_s8(d2, d3);
q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8));
q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8));
q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8));
q2s8 = vshrq_n_s8(q2s8, 3);
q13s8 = vshrq_n_s8(q13s8, 3);
q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8);
q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8);
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63);
d5 = vdup_n_s8(9);
d4 = vdup_n_s8(18);
q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5);
q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5);
d5 = vdup_n_s8(27);
q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4);
q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4);
q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5);
q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5);
d0 = vqshrn_n_s16(q0s16 , 7);
d1 = vqshrn_n_s16(q11s16, 7);
d24 = vqshrn_n_s16(q12s16, 7);
d25 = vqshrn_n_s16(q13s16, 7);
d28 = vqshrn_n_s16(q14s16, 7);
d29 = vqshrn_n_s16(q15s16, 7);
q0s8 = vcombine_s8(d0, d1);
q12s8 = vcombine_s8(d24, d25);
q14s8 = vcombine_s8(d28, d29);
q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8);
q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8);
q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8);
q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8);
q15s8 = vqsubq_s8((q7s8), q14s8);
q14s8 = vqaddq_s8((q6s8), q14s8);
q1u8 = vdupq_n_u8(0x80);
*q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8);
*q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8);
*q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8);
*q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8);
*q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8);
*q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8);
return;
}
void vp8_mbloop_filter_horizontal_edge_y_neon(
unsigned char *src,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh) {
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
src -= (pitch << 2);
q3 = vld1q_u8(src);
src += pitch;
q4 = vld1q_u8(src);
src += pitch;
q5 = vld1q_u8(src);
src += pitch;
q6 = vld1q_u8(src);
src += pitch;
q7 = vld1q_u8(src);
src += pitch;
q8 = vld1q_u8(src);
src += pitch;
q9 = vld1q_u8(src);
src += pitch;
q10 = vld1q_u8(src);
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q4, &q5, &q6, &q7, &q8, &q9);
src -= (pitch * 6);
vst1q_u8(src, q4);
src += pitch;
vst1q_u8(src, q5);
src += pitch;
vst1q_u8(src, q6);
src += pitch;
vst1q_u8(src, q7);
src += pitch;
vst1q_u8(src, q8);
src += pitch;
vst1q_u8(src, q9);
return;
}
void vp8_mbloop_filter_horizontal_edge_uv_neon(
unsigned char *u,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh,
unsigned char *v) {
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
u -= (pitch << 2);
v -= (pitch << 2);
d6 = vld1_u8(u);
u += pitch;
d7 = vld1_u8(v);
v += pitch;
d8 = vld1_u8(u);
u += pitch;
d9 = vld1_u8(v);
v += pitch;
d10 = vld1_u8(u);
u += pitch;
d11 = vld1_u8(v);
v += pitch;
d12 = vld1_u8(u);
u += pitch;
d13 = vld1_u8(v);
v += pitch;
d14 = vld1_u8(u);
u += pitch;
d15 = vld1_u8(v);
v += pitch;
d16 = vld1_u8(u);
u += pitch;
d17 = vld1_u8(v);
v += pitch;
d18 = vld1_u8(u);
u += pitch;
d19 = vld1_u8(v);
v += pitch;
d20 = vld1_u8(u);
d21 = vld1_u8(v);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q4, &q5, &q6, &q7, &q8, &q9);
u -= (pitch * 6);
v -= (pitch * 6);
vst1_u8(u, vget_low_u8(q4));
u += pitch;
vst1_u8(v, vget_high_u8(q4));
v += pitch;
vst1_u8(u, vget_low_u8(q5));
u += pitch;
vst1_u8(v, vget_high_u8(q5));
v += pitch;
vst1_u8(u, vget_low_u8(q6));
u += pitch;
vst1_u8(v, vget_high_u8(q6));
v += pitch;
vst1_u8(u, vget_low_u8(q7));
u += pitch;
vst1_u8(v, vget_high_u8(q7));
v += pitch;
vst1_u8(u, vget_low_u8(q8));
u += pitch;
vst1_u8(v, vget_high_u8(q8));
v += pitch;
vst1_u8(u, vget_low_u8(q9));
vst1_u8(v, vget_high_u8(q9));
return;
}
void vp8_mbloop_filter_vertical_edge_y_neon(
unsigned char *src,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh) {
unsigned char *s1, *s2;
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
s1 = src - 4;
s2 = s1 + 8 * pitch;
d6 = vld1_u8(s1);
s1 += pitch;
d7 = vld1_u8(s2);
s2 += pitch;
d8 = vld1_u8(s1);
s1 += pitch;
d9 = vld1_u8(s2);
s2 += pitch;
d10 = vld1_u8(s1);
s1 += pitch;
d11 = vld1_u8(s2);
s2 += pitch;
d12 = vld1_u8(s1);
s1 += pitch;
d13 = vld1_u8(s2);
s2 += pitch;
d14 = vld1_u8(s1);
s1 += pitch;
d15 = vld1_u8(s2);
s2 += pitch;
d16 = vld1_u8(s1);
s1 += pitch;
d17 = vld1_u8(s2);
s2 += pitch;
d18 = vld1_u8(s1);
s1 += pitch;
d19 = vld1_u8(s2);
s2 += pitch;
d20 = vld1_u8(s1);
d21 = vld1_u8(s2);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q4, &q5, &q6, &q7, &q8, &q9);
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
s1 -= 7 * pitch;
s2 -= 7 * pitch;
vst1_u8(s1, vget_low_u8(q3));
s1 += pitch;
vst1_u8(s2, vget_high_u8(q3));
s2 += pitch;
vst1_u8(s1, vget_low_u8(q4));
s1 += pitch;
vst1_u8(s2, vget_high_u8(q4));
s2 += pitch;
vst1_u8(s1, vget_low_u8(q5));
s1 += pitch;
vst1_u8(s2, vget_high_u8(q5));
s2 += pitch;
vst1_u8(s1, vget_low_u8(q6));
s1 += pitch;
vst1_u8(s2, vget_high_u8(q6));
s2 += pitch;
vst1_u8(s1, vget_low_u8(q7));
s1 += pitch;
vst1_u8(s2, vget_high_u8(q7));
s2 += pitch;
vst1_u8(s1, vget_low_u8(q8));
s1 += pitch;
vst1_u8(s2, vget_high_u8(q8));
s2 += pitch;
vst1_u8(s1, vget_low_u8(q9));
s1 += pitch;
vst1_u8(s2, vget_high_u8(q9));
s2 += pitch;
vst1_u8(s1, vget_low_u8(q10));
vst1_u8(s2, vget_high_u8(q10));
return;
}
void vp8_mbloop_filter_vertical_edge_uv_neon(
unsigned char *u,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh,
unsigned char *v) {
unsigned char *us, *ud;
unsigned char *vs, *vd;
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
us = u - 4;
vs = v - 4;
d6 = vld1_u8(us);
us += pitch;
d7 = vld1_u8(vs);
vs += pitch;
d8 = vld1_u8(us);
us += pitch;
d9 = vld1_u8(vs);
vs += pitch;
d10 = vld1_u8(us);
us += pitch;
d11 = vld1_u8(vs);
vs += pitch;
d12 = vld1_u8(us);
us += pitch;
d13 = vld1_u8(vs);
vs += pitch;
d14 = vld1_u8(us);
us += pitch;
d15 = vld1_u8(vs);
vs += pitch;
d16 = vld1_u8(us);
us += pitch;
d17 = vld1_u8(vs);
vs += pitch;
d18 = vld1_u8(us);
us += pitch;
d19 = vld1_u8(vs);
vs += pitch;
d20 = vld1_u8(us);
d21 = vld1_u8(vs);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q4, &q5, &q6, &q7, &q8, &q9);
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
ud = u - 4;
vst1_u8(ud, vget_low_u8(q3));
ud += pitch;
vst1_u8(ud, vget_low_u8(q4));
ud += pitch;
vst1_u8(ud, vget_low_u8(q5));
ud += pitch;
vst1_u8(ud, vget_low_u8(q6));
ud += pitch;
vst1_u8(ud, vget_low_u8(q7));
ud += pitch;
vst1_u8(ud, vget_low_u8(q8));
ud += pitch;
vst1_u8(ud, vget_low_u8(q9));
ud += pitch;
vst1_u8(ud, vget_low_u8(q10));
vd = v - 4;
vst1_u8(vd, vget_high_u8(q3));
vd += pitch;
vst1_u8(vd, vget_high_u8(q4));
vd += pitch;
vst1_u8(vd, vget_high_u8(q5));
vd += pitch;
vst1_u8(vd, vget_high_u8(q6));
vd += pitch;
vst1_u8(vd, vget_high_u8(q7));
vd += pitch;
vst1_u8(vd, vget_high_u8(q8));
vd += pitch;
vst1_u8(vd, vget_high_u8(q9));
vd += pitch;
vst1_u8(vd, vget_high_u8(q10));
return;
}

Просмотреть файл

@ -1,123 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
static const int16_t cospi8sqrt2minus1 = 20091;
static const int16_t sinpi8sqrt2 = 35468;
void vp8_short_idct4x4llm_neon(
int16_t *input,
unsigned char *pred_ptr,
int pred_stride,
unsigned char *dst_ptr,
int dst_stride) {
int i;
uint32x2_t d6u32 = vdup_n_u32(0);
uint8x8_t d1u8;
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
uint16x8_t q1u16;
int16x8_t q1s16, q2s16, q3s16, q4s16;
int32x2x2_t v2tmp0, v2tmp1;
int16x4x2_t v2tmp2, v2tmp3;
d2 = vld1_s16(input);
d3 = vld1_s16(input + 4);
d4 = vld1_s16(input + 8);
d5 = vld1_s16(input + 12);
// 1st for loop
q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here
q2s16 = vcombine_s16(d3, d5);
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
q3s16 = vshrq_n_s16(q3s16, 1);
q4s16 = vshrq_n_s16(q4s16, 1);
q3s16 = vqaddq_s16(q3s16, q2s16);
q4s16 = vqaddq_s16(q4s16, q2s16);
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
vreinterpret_s16_s32(v2tmp1.val[0]));
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
vreinterpret_s16_s32(v2tmp1.val[1]));
// 2nd for loop
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
q3s16 = vshrq_n_s16(q3s16, 1);
q4s16 = vshrq_n_s16(q4s16, 1);
q3s16 = vqaddq_s16(q3s16, q2s16);
q4s16 = vqaddq_s16(q4s16, q2s16);
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2 = vrshr_n_s16(d2, 3);
d3 = vrshr_n_s16(d3, 3);
d4 = vrshr_n_s16(d4, 3);
d5 = vrshr_n_s16(d5, 3);
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
vreinterpret_s16_s32(v2tmp1.val[0]));
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
vreinterpret_s16_s32(v2tmp1.val[1]));
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
// dc_only_idct_add
for (i = 0; i < 2; i++, q1s16 = q2s16) {
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
pred_ptr += pred_stride;
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
pred_ptr += pred_stride;
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16),
vreinterpret_u8_u32(d6u32));
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
dst_ptr += dst_stride;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
dst_ptr += dst_stride;
}
return;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,550 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "./vpx_config.h"
#include "vpx_ports/arm.h"
static INLINE void vp8_loop_filter_neon(
uint8x16_t qblimit, // flimit
uint8x16_t qlimit, // limit
uint8x16_t qthresh, // thresh
uint8x16_t q3, // p3
uint8x16_t q4, // p2
uint8x16_t q5, // p1
uint8x16_t q6, // p0
uint8x16_t q7, // q0
uint8x16_t q8, // q1
uint8x16_t q9, // q2
uint8x16_t q10, // q3
uint8x16_t *q5r, // p1
uint8x16_t *q6r, // p0
uint8x16_t *q7r, // q0
uint8x16_t *q8r) { // q1
uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
int16x8_t q2s16, q11s16;
uint16x8_t q4u16;
int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
int8x8_t d2s8, d3s8;
q11u8 = vabdq_u8(q3, q4);
q12u8 = vabdq_u8(q4, q5);
q13u8 = vabdq_u8(q5, q6);
q14u8 = vabdq_u8(q8, q7);
q3 = vabdq_u8(q9, q8);
q4 = vabdq_u8(q10, q9);
q11u8 = vmaxq_u8(q11u8, q12u8);
q12u8 = vmaxq_u8(q13u8, q14u8);
q3 = vmaxq_u8(q3, q4);
q15u8 = vmaxq_u8(q11u8, q12u8);
q9 = vabdq_u8(q6, q7);
// vp8_hevmask
q13u8 = vcgtq_u8(q13u8, qthresh);
q14u8 = vcgtq_u8(q14u8, qthresh);
q15u8 = vmaxq_u8(q15u8, q3);
q2u8 = vabdq_u8(q5, q8);
q9 = vqaddq_u8(q9, q9);
q15u8 = vcgeq_u8(qlimit, q15u8);
// vp8_filter() function
// convert to signed
q10 = vdupq_n_u8(0x80);
q8 = veorq_u8(q8, q10);
q7 = veorq_u8(q7, q10);
q6 = veorq_u8(q6, q10);
q5 = veorq_u8(q5, q10);
q2u8 = vshrq_n_u8(q2u8, 1);
q9 = vqaddq_u8(q9, q2u8);
q10 = vdupq_n_u8(3);
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
vget_low_s8(vreinterpretq_s8_u8(q6)));
q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
vget_high_s8(vreinterpretq_s8_u8(q6)));
q9 = vcgeq_u8(qblimit, q9);
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
vreinterpretq_s8_u8(q8));
q14u8 = vorrq_u8(q13u8, q14u8);
q4u16 = vmovl_u8(vget_low_u8(q10));
q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
q15u8 = vandq_u8(q15u8, q9);
q1s8 = vreinterpretq_s8_u8(q1u8);
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
q9 = vdupq_n_u8(4);
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
d2s8 = vqmovn_s16(q2s16);
d3s8 = vqmovn_s16(q11s16);
q1s8 = vcombine_s8(d2s8, d3s8);
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
q1s8 = vreinterpretq_s8_u8(q1u8);
q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
q2s8 = vshrq_n_s8(q2s8, 3);
q1s8 = vshrq_n_s8(q1s8, 3);
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
q1s8 = vrshrq_n_s8(q1s8, 1);
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
q0u8 = vdupq_n_u8(0x80);
*q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
*q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
*q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
*q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
return;
}
void vp8_loop_filter_horizontal_edge_y_neon(
unsigned char *src,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh) {
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
src -= (pitch << 2);
q3 = vld1q_u8(src);
src += pitch;
q4 = vld1q_u8(src);
src += pitch;
q5 = vld1q_u8(src);
src += pitch;
q6 = vld1q_u8(src);
src += pitch;
q7 = vld1q_u8(src);
src += pitch;
q8 = vld1q_u8(src);
src += pitch;
q9 = vld1q_u8(src);
src += pitch;
q10 = vld1q_u8(src);
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q5, &q6, &q7, &q8);
src -= (pitch * 5);
vst1q_u8(src, q5);
src += pitch;
vst1q_u8(src, q6);
src += pitch;
vst1q_u8(src, q7);
src += pitch;
vst1q_u8(src, q8);
return;
}
void vp8_loop_filter_horizontal_edge_uv_neon(
unsigned char *u,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh,
unsigned char *v) {
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
u -= (pitch << 2);
v -= (pitch << 2);
d6 = vld1_u8(u);
u += pitch;
d7 = vld1_u8(v);
v += pitch;
d8 = vld1_u8(u);
u += pitch;
d9 = vld1_u8(v);
v += pitch;
d10 = vld1_u8(u);
u += pitch;
d11 = vld1_u8(v);
v += pitch;
d12 = vld1_u8(u);
u += pitch;
d13 = vld1_u8(v);
v += pitch;
d14 = vld1_u8(u);
u += pitch;
d15 = vld1_u8(v);
v += pitch;
d16 = vld1_u8(u);
u += pitch;
d17 = vld1_u8(v);
v += pitch;
d18 = vld1_u8(u);
u += pitch;
d19 = vld1_u8(v);
v += pitch;
d20 = vld1_u8(u);
d21 = vld1_u8(v);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q5, &q6, &q7, &q8);
u -= (pitch * 5);
vst1_u8(u, vget_low_u8(q5));
u += pitch;
vst1_u8(u, vget_low_u8(q6));
u += pitch;
vst1_u8(u, vget_low_u8(q7));
u += pitch;
vst1_u8(u, vget_low_u8(q8));
v -= (pitch * 5);
vst1_u8(v, vget_high_u8(q5));
v += pitch;
vst1_u8(v, vget_high_u8(q6));
v += pitch;
vst1_u8(v, vget_high_u8(q7));
v += pitch;
vst1_u8(v, vget_high_u8(q8));
return;
}
static INLINE void write_4x8(unsigned char *dst, int pitch,
const uint8x8x4_t result) {
#ifdef VPX_INCOMPATIBLE_GCC
/*
* uint8x8x4_t result
00 01 02 03 | 04 05 06 07
10 11 12 13 | 14 15 16 17
20 21 22 23 | 24 25 26 27
30 31 32 33 | 34 35 36 37
---
* after vtrn_u16
00 01 20 21 | 04 05 24 25
02 03 22 23 | 06 07 26 27
10 11 30 31 | 14 15 34 35
12 13 32 33 | 16 17 36 37
---
* after vtrn_u8
00 10 20 30 | 04 14 24 34
01 11 21 31 | 05 15 25 35
02 12 22 32 | 06 16 26 36
03 13 23 33 | 07 17 27 37
*/
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
vreinterpret_u16_u8(result.val[2]));
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
vreinterpret_u16_u8(result.val[3]));
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
vreinterpret_u8_u16(r13_u16.val[0]));
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
vreinterpret_u8_u16(r13_u16.val[1]));
const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
#else
vst4_lane_u8(dst, result, 0);
dst += pitch;
vst4_lane_u8(dst, result, 1);
dst += pitch;
vst4_lane_u8(dst, result, 2);
dst += pitch;
vst4_lane_u8(dst, result, 3);
dst += pitch;
vst4_lane_u8(dst, result, 4);
dst += pitch;
vst4_lane_u8(dst, result, 5);
dst += pitch;
vst4_lane_u8(dst, result, 6);
dst += pitch;
vst4_lane_u8(dst, result, 7);
#endif // VPX_INCOMPATIBLE_GCC
}
void vp8_loop_filter_vertical_edge_y_neon(
unsigned char *src,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh) {
unsigned char *s, *d;
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
uint8x8x4_t q4ResultH, q4ResultL;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
s = src - 4;
d6 = vld1_u8(s);
s += pitch;
d8 = vld1_u8(s);
s += pitch;
d10 = vld1_u8(s);
s += pitch;
d12 = vld1_u8(s);
s += pitch;
d14 = vld1_u8(s);
s += pitch;
d16 = vld1_u8(s);
s += pitch;
d18 = vld1_u8(s);
s += pitch;
d20 = vld1_u8(s);
s += pitch;
d7 = vld1_u8(s);
s += pitch;
d9 = vld1_u8(s);
s += pitch;
d11 = vld1_u8(s);
s += pitch;
d13 = vld1_u8(s);
s += pitch;
d15 = vld1_u8(s);
s += pitch;
d17 = vld1_u8(s);
s += pitch;
d19 = vld1_u8(s);
s += pitch;
d21 = vld1_u8(s);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q5, &q6, &q7, &q8);
q4ResultL.val[0] = vget_low_u8(q5); // d10
q4ResultL.val[1] = vget_low_u8(q6); // d12
q4ResultL.val[2] = vget_low_u8(q7); // d14
q4ResultL.val[3] = vget_low_u8(q8); // d16
q4ResultH.val[0] = vget_high_u8(q5); // d11
q4ResultH.val[1] = vget_high_u8(q6); // d13
q4ResultH.val[2] = vget_high_u8(q7); // d15
q4ResultH.val[3] = vget_high_u8(q8); // d17
d = src - 2;
write_4x8(d, pitch, q4ResultL);
d += pitch * 8;
write_4x8(d, pitch, q4ResultH);
}
void vp8_loop_filter_vertical_edge_uv_neon(
unsigned char *u,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh,
unsigned char *v) {
unsigned char *us, *ud;
unsigned char *vs, *vd;
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
uint8x8x4_t q4ResultH, q4ResultL;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
us = u - 4;
d6 = vld1_u8(us);
us += pitch;
d8 = vld1_u8(us);
us += pitch;
d10 = vld1_u8(us);
us += pitch;
d12 = vld1_u8(us);
us += pitch;
d14 = vld1_u8(us);
us += pitch;
d16 = vld1_u8(us);
us += pitch;
d18 = vld1_u8(us);
us += pitch;
d20 = vld1_u8(us);
vs = v - 4;
d7 = vld1_u8(vs);
vs += pitch;
d9 = vld1_u8(vs);
vs += pitch;
d11 = vld1_u8(vs);
vs += pitch;
d13 = vld1_u8(vs);
vs += pitch;
d15 = vld1_u8(vs);
vs += pitch;
d17 = vld1_u8(vs);
vs += pitch;
d19 = vld1_u8(vs);
vs += pitch;
d21 = vld1_u8(vs);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q5, &q6, &q7, &q8);
q4ResultL.val[0] = vget_low_u8(q5); // d10
q4ResultL.val[1] = vget_low_u8(q6); // d12
q4ResultL.val[2] = vget_low_u8(q7); // d14
q4ResultL.val[3] = vget_low_u8(q8); // d16
ud = u - 2;
write_4x8(ud, pitch, q4ResultL);
q4ResultH.val[0] = vget_high_u8(q5); // d11
q4ResultH.val[1] = vget_high_u8(q6); // d13
q4ResultH.val[2] = vget_high_u8(q7); // d15
q4ResultH.val[3] = vget_high_u8(q8); // d17
vd = v - 2;
write_4x8(vd, pitch, q4ResultH);
}

Просмотреть файл

@ -1,22 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
const unsigned char vp8_block2left[25] =
{
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
};
const unsigned char vp8_block2above[25] =
{
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
};

Просмотреть файл

@ -1,312 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_BLOCKD_H_
#define VP8_COMMON_BLOCKD_H_
void vpx_log(const char *format, ...);
#include "vpx_config.h"
#include "vpx_scale/yv12config.h"
#include "mv.h"
#include "treecoder.h"
#include "vpx_ports/mem.h"
#ifdef __cplusplus
extern "C" {
#endif
/*#define DCPRED 1*/
#define DCPREDSIMTHRESH 0
#define DCPREDCNTTHRESH 3
#define MB_FEATURE_TREE_PROBS 3
#define MAX_MB_SEGMENTS 4
#define MAX_REF_LF_DELTAS 4
#define MAX_MODE_LF_DELTAS 4
/* Segment Feature Masks */
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
typedef struct
{
int r, c;
} POS;
#define PLANE_TYPE_Y_NO_DC 0
#define PLANE_TYPE_Y2 1
#define PLANE_TYPE_UV 2
#define PLANE_TYPE_Y_WITH_DC 3
typedef char ENTROPY_CONTEXT;
typedef struct
{
ENTROPY_CONTEXT y1[4];
ENTROPY_CONTEXT u[2];
ENTROPY_CONTEXT v[2];
ENTROPY_CONTEXT y2;
} ENTROPY_CONTEXT_PLANES;
extern const unsigned char vp8_block2left[25];
extern const unsigned char vp8_block2above[25];
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
Dest = (A)+(B);
typedef enum
{
KEY_FRAME = 0,
INTER_FRAME = 1
} FRAME_TYPE;
typedef enum
{
DC_PRED, /* average of above and left pixels */
V_PRED, /* vertical prediction */
H_PRED, /* horizontal prediction */
TM_PRED, /* Truemotion prediction */
B_PRED, /* block based prediction, each block has its own prediction mode */
NEARESTMV,
NEARMV,
ZEROMV,
NEWMV,
SPLITMV,
MB_MODE_COUNT
} MB_PREDICTION_MODE;
/* Macroblock level features */
typedef enum
{
MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
MB_LVL_MAX = 2 /* Number of MB level features supported */
} MB_LVL_FEATURES;
/* Segment Feature Masks */
#define SEGMENT_ALTQ 0x01
#define SEGMENT_ALT_LF 0x02
#define VP8_YMODES (B_PRED + 1)
#define VP8_UV_MODES (TM_PRED + 1)
#define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
typedef enum
{
B_DC_PRED, /* average of above and left pixels */
B_TM_PRED,
B_VE_PRED, /* vertical prediction */
B_HE_PRED, /* horizontal prediction */
B_LD_PRED,
B_RD_PRED,
B_VR_PRED,
B_VL_PRED,
B_HD_PRED,
B_HU_PRED,
LEFT4X4,
ABOVE4X4,
ZERO4X4,
NEW4X4,
B_MODE_COUNT
} B_PREDICTION_MODE;
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
/* For keyframes, intra block modes are predicted by the (already decoded)
modes for the Y blocks to the left and above us; for interframes, there
is a single probability table. */
union b_mode_info
{
B_PREDICTION_MODE as_mode;
int_mv mv;
};
typedef enum
{
INTRA_FRAME = 0,
LAST_FRAME = 1,
GOLDEN_FRAME = 2,
ALTREF_FRAME = 3,
MAX_REF_FRAMES = 4
} MV_REFERENCE_FRAME;
typedef struct
{
uint8_t mode, uv_mode;
uint8_t ref_frame;
uint8_t is_4x4;
int_mv mv;
uint8_t partitioning;
uint8_t mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
uint8_t need_to_clamp_mvs;
uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */
} MB_MODE_INFO;
typedef struct modeinfo
{
MB_MODE_INFO mbmi;
union b_mode_info bmi[16];
} MODE_INFO;
#if CONFIG_MULTI_RES_ENCODING
/* The mb-level information needed to be stored for higher-resolution encoder */
typedef struct
{
MB_PREDICTION_MODE mode;
MV_REFERENCE_FRAME ref_frame;
int_mv mv;
int dissim; /* dissimilarity level of the macroblock */
} LOWER_RES_MB_INFO;
/* The frame-level information needed to be stored for higher-resolution
* encoder */
typedef struct
{
FRAME_TYPE frame_type;
int is_frame_dropped;
// The frame rate for the lowest resolution.
double low_res_framerate;
/* The frame number of each reference frames */
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
// The video frame counter value for the key frame, for lowest resolution.
unsigned int key_frame_counter_value;
LOWER_RES_MB_INFO *mb_info;
} LOWER_RES_FRAME_INFO;
#endif
typedef struct blockd
{
short *qcoeff;
short *dqcoeff;
unsigned char *predictor;
short *dequant;
int offset;
char *eob;
union b_mode_info bmi;
} BLOCKD;
typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
typedef struct macroblockd
{
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
DECLARE_ALIGNED(16, short, qcoeff[400]);
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, char, eobs[25]);
DECLARE_ALIGNED(16, short, dequant_y1[16]);
DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
DECLARE_ALIGNED(16, short, dequant_y2[16]);
DECLARE_ALIGNED(16, short, dequant_uv[16]);
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
BLOCKD block[25];
int fullpixel_mask;
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
YV12_BUFFER_CONFIG dst;
MODE_INFO *mode_info_context;
int mode_info_stride;
FRAME_TYPE frame_type;
int up_available;
int left_available;
unsigned char *recon_above[3];
unsigned char *recon_left[3];
int recon_left_stride[2];
/* Y,U,V,Y2 */
ENTROPY_CONTEXT_PLANES *above_context;
ENTROPY_CONTEXT_PLANES *left_context;
/* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
unsigned char segmentation_enabled;
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
unsigned char update_mb_segmentation_map;
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char update_mb_segmentation_data;
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char mb_segement_abs_delta;
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
/* mode_based Loop filter adjustment */
unsigned char mode_ref_lf_delta_enabled;
unsigned char mode_ref_lf_delta_update;
/* Delta values have the range +/- MAX_LOOP_FILTER */
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
/* Distance of MB away from frame edges */
int mb_to_left_edge;
int mb_to_right_edge;
int mb_to_top_edge;
int mb_to_bottom_edge;
vp8_subpix_fn_t subpixel_predict;
vp8_subpix_fn_t subpixel_predict8x4;
vp8_subpix_fn_t subpixel_predict8x8;
vp8_subpix_fn_t subpixel_predict16x16;
void *current_bc;
int corrupted;
#if ARCH_X86 || ARCH_X86_64
/* This is an intermediate buffer currently used in sub-pixel motion search
* to keep a copy of the reference area. This buffer can be used for other
* purpose.
*/
DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
#endif
} MACROBLOCKD;
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_BLOCKD_H_

Просмотреть файл

@ -1,197 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_COEFUPDATEPROBS_H_
#define VP8_COMMON_COEFUPDATEPROBS_H_
#ifdef __cplusplus
extern "C" {
#endif
/* Update probabilities for the nodes in the token entropy tree.
Generated file included by entropy.c */
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] =
{
{
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
{
{
{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
},
{
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
{
{
{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
},
{
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
{
{
{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
};
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_COEFUPDATEPROBS_H_

Просмотреть файл

@ -1,48 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_COMMON_H_
#define VP8_COMMON_COMMON_H_
#include <assert.h>
/* Interface header for common constant data structures and lookup tables */
#include "vpx_mem/vpx_mem.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Only need this for fixed-size arrays, for structs just assign. */
#define vp8_copy( Dest, Src) { \
assert( sizeof( Dest) == sizeof( Src)); \
memcpy( Dest, Src, sizeof( Src)); \
}
/* Use this for variably-sized arrays. */
#define vp8_copy_array( Dest, Src, N) { \
assert( sizeof( *Dest) == sizeof( *Src)); \
memcpy( Dest, Src, N * sizeof( *Src)); \
}
#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest));
#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest));
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_COMMON_H_

Просмотреть файл

@ -1,399 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "entropy.h"
/* *** GENERATED FILE: DO NOT EDIT *** */
#if 0
int Contexts[vp8_coef_counter_dimen];
const int default_contexts[vp8_coef_counter_dimen] =
{
{
// Block Type ( 0 )
{
// Coeff Band ( 0 )
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
},
{
// Coeff Band ( 1 )
{30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,},
{26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,},
{10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,},
},
{
// Coeff Band ( 2 )
{25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,},
{9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,},
{1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,},
},
{
// Coeff Band ( 3 )
{26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,},
{8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,},
{ 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,},
},
{
// Coeff Band ( 4 )
{10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,},
{2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,},
{ 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,},
},
{
// Coeff Band ( 5 )
{10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,},
{2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,},
{ 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,},
},
{
// Coeff Band ( 6 )
{40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,},
{6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,},
{ 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,},
},
{
// Coeff Band ( 7 )
{ 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,},
},
},
{
// Block Type ( 1 )
{
// Coeff Band ( 0 )
{3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,},
{8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,},
{9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,},
},
{
// Coeff Band ( 1 )
{12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,},
{11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,},
{7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,},
},
{
// Coeff Band ( 2 )
{15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,},
{7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,},
{1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,},
},
{
// Coeff Band ( 3 )
{19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,},
{9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,},
{1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,},
},
{
// Coeff Band ( 4 )
{12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,},
{4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,},
{ 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,},
},
{
// Coeff Band ( 5 )
{12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,},
{4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,},
{ 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,},
},
{
// Coeff Band ( 6 )
{61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,},
{15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,},
{ 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,},
},
{
// Coeff Band ( 7 )
{ 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,},
{ 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,},
},
},
{
// Block Type ( 2 )
{
// Coeff Band ( 0 )
{ 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,},
{1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,},
{1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,},
},
{
// Coeff Band ( 1 )
{1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,},
{1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,},
{1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,},
},
{
// Coeff Band ( 2 )
{ 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,},
{ 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,},
{ 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,},
},
{
// Coeff Band ( 3 )
{ 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,},
{ 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,},
},
{
// Coeff Band ( 4 )
{ 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,},
{ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,},
},
{
// Coeff Band ( 5 )
{ 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,},
{ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,},
},
{
// Coeff Band ( 6 )
{ 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,},
{ 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,},
},
{
// Coeff Band ( 7 )
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
},
},
{
// Block Type ( 3 )
{
// Coeff Band ( 0 )
{2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,},
{8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,},
{11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,},
},
{
// Coeff Band ( 1 )
{9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,},
{12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,},
{10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,},
},
{
// Coeff Band ( 2 )
{6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,},
{6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,},
{3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,},
},
{
// Coeff Band ( 3 )
{11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,},
{9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,},
{4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,},
},
{
// Coeff Band ( 4 )
{4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,},
{3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,},
{1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,},
},
{
// Coeff Band ( 5 )
{8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,},
{3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,},
{1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,},
},
{
// Coeff Band ( 6 )
{27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,},
{5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,},
{1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,},
},
{
// Coeff Band ( 7 )
{ 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,},
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,},
},
},
};
//Update probabilities for the nodes in the token entropy tree.
const vp8_prob tree_update_probs[vp8_coef_tree_dimen] =
{
{
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
{
{
{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
},
{
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
{
{
{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
},
{
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
{
{
{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
};
#endif

Просмотреть файл

@ -1,32 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include "./vp8_rtcd.h"
#include "vpx/vpx_integer.h"
/* Copy 2 macroblocks to a buffer */
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
unsigned char *dst_ptr, int dst_stride,
int height)
{
int r;
for (r = 0; r < height; r++)
{
memcpy(dst_ptr, src_ptr, 32);
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}

Просмотреть файл

@ -1,155 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdio.h>
#include "blockd.h"
void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame)
{
int mb_row;
int mb_col;
int mb_index = 0;
FILE *mvs = fopen("mvs.stt", "a");
/* print out the macroblock Y modes */
mb_index = 0;
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
for (mb_row = 0; mb_row < rows; mb_row++)
{
for (mb_col = 0; mb_col < cols; mb_col++)
{
fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
mb_index++;
}
fprintf(mvs, "\n");
mb_index++;
}
fprintf(mvs, "\n");
mb_index = 0;
fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
for (mb_row = 0; mb_row < rows; mb_row++)
{
for (mb_col = 0; mb_col < cols; mb_col++)
{
fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
mb_index++;
}
fprintf(mvs, "\n");
mb_index++;
}
fprintf(mvs, "\n");
/* print out the macroblock UV modes */
mb_index = 0;
fprintf(mvs, "UV Modes for Frame %d\n", frame);
for (mb_row = 0; mb_row < rows; mb_row++)
{
for (mb_col = 0; mb_col < cols; mb_col++)
{
fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
mb_index++;
}
mb_index++;
fprintf(mvs, "\n");
}
fprintf(mvs, "\n");
/* print out the block modes */
fprintf(mvs, "Mbs for Frame %d\n", frame);
{
int b_row;
for (b_row = 0; b_row < 4 * rows; b_row++)
{
int b_col;
int bindex;
for (b_col = 0; b_col < 4 * cols; b_col++)
{
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
bindex = (b_row & 3) * 4 + (b_col & 3);
if (mi[mb_index].mbmi.mode == B_PRED)
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
else
fprintf(mvs, "xx ");
}
fprintf(mvs, "\n");
}
}
fprintf(mvs, "\n");
/* print out the macroblock mvs */
mb_index = 0;
fprintf(mvs, "MVs for Frame %d\n", frame);
for (mb_row = 0; mb_row < rows; mb_row++)
{
for (mb_col = 0; mb_col < cols; mb_col++)
{
fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2);
mb_index++;
}
mb_index++;
fprintf(mvs, "\n");
}
fprintf(mvs, "\n");
/* print out the block modes */
fprintf(mvs, "MVs for Frame %d\n", frame);
{
int b_row;
for (b_row = 0; b_row < 4 * rows; b_row++)
{
int b_col;
int bindex;
for (b_col = 0; b_col < 4 * cols; b_col++)
{
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
bindex = (b_row & 3) * 4 + (b_col & 3);
fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col);
}
fprintf(mvs, "\n");
}
}
fprintf(mvs, "\n");
fclose(mvs);
}

Просмотреть файл

@ -1,200 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_
#define VP8_COMMON_DEFAULT_COEF_PROBS_H_
#ifdef __cplusplus
extern "C" {
#endif
/*Generated file, included by entropy.c*/
static const vp8_prob default_coef_probs [BLOCK_TYPES]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[ENTROPY_NODES] =
{
{ /* Block Type ( 0 ) */
{ /* Coeff Band ( 0 )*/
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ /* Coeff Band ( 1 )*/
{ 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
{ 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
{ 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
},
{ /* Coeff Band ( 2 )*/
{ 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
{ 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
{ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }
},
{ /* Coeff Band ( 3 )*/
{ 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
{ 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
{ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }
},
{ /* Coeff Band ( 4 )*/
{ 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
{ 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
{ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
},
{ /* Coeff Band ( 5 )*/
{ 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
{ 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
{ 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
},
{ /* Coeff Band ( 6 )*/
{ 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
{ 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
{ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
},
{ /* Coeff Band ( 7 )*/
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
}
},
{ /* Block Type ( 1 ) */
{ /* Coeff Band ( 0 )*/
{ 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
{ 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
{ 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
},
{ /* Coeff Band ( 1 )*/
{ 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
{ 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
{ 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
},
{ /* Coeff Band ( 2 )*/
{ 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
{ 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
{ 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
},
{ /* Coeff Band ( 3 )*/
{ 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
{ 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
{ 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
},
{ /* Coeff Band ( 4 )*/
{ 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
{ 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
{ 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
},
{ /* Coeff Band ( 5 )*/
{ 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
{ 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
{ 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
},
{ /* Coeff Band ( 6 )*/
{ 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
{ 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
{ 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
},
{ /* Coeff Band ( 7 )*/
{ 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
{ 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
}
},
{ /* Block Type ( 2 ) */
{ /* Coeff Band ( 0 )*/
{ 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
{ 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
{ 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
},
{ /* Coeff Band ( 1 )*/
{ 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
{ 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
{ 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
},
{ /* Coeff Band ( 2 )*/
{ 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
{ 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
{ 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
},
{ /* Coeff Band ( 3 )*/
{ 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
{ 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
},
{ /* Coeff Band ( 4 )*/
{ 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
{ 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ /* Coeff Band ( 5 )*/
{ 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ /* Coeff Band ( 6 )*/
{ 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ /* Coeff Band ( 7 )*/
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
}
},
{ /* Block Type ( 3 ) */
{ /* Coeff Band ( 0 )*/
{ 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
{ 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
{ 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
},
{ /* Coeff Band ( 1 )*/
{ 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
{ 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
{ 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
},
{ /* Coeff Band ( 2 )*/
{ 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
{ 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
{ 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
},
{ /* Coeff Band ( 3 )*/
{ 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
{ 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
{ 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
},
{ /* Coeff Band ( 4 )*/
{ 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
{ 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
{ 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
},
{ /* Coeff Band ( 5 )*/
{ 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
{ 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
{ 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
},
{ /* Coeff Band ( 6 )*/
{ 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
{ 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
{ 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
},
{ /* Coeff Band ( 7 )*/
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
}
}
};
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_DEFAULT_COEF_PROBS_H_

Просмотреть файл

@ -1,43 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vpx_mem/vpx_mem.h"
void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
{
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
for (i = 0; i < 16; i++)
{
DQ[i] = Q[i] * DQC[i];
}
}
void vp8_dequant_idct_add_c(short *input, short *dq,
unsigned char *dest, int stride)
{
int i;
for (i = 0; i < 16; i++)
{
input[i] = dq[i] * input[i];
}
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
memset(input, 0, 32);
}

Просмотреть файл

@ -1,188 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "entropy.h"
#include "blockd.h"
#include "onyxc_int.h"
#include "vpx_mem/vpx_mem.h"
#include "coefupdateprobs.h"
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) =
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) =
{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
DECLARE_ALIGNED(16, const unsigned char,
vp8_prev_token_class[MAX_ENTROPY_TOKENS]) =
{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
{
0, 1, 4, 8,
5, 2, 3, 6,
9, 12, 13, 10,
7, 11, 14, 15,
};
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
{
1, 2, 6, 7,
3, 5, 8, 13,
4, 9, 12, 14,
10, 11, 15, 16
};
/* vp8_default_zig_zag_mask generated with:
void vp8_init_scan_order_mask()
{
int i;
for (i = 0; i < 16; i++)
{
vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i;
}
}
*/
DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) =
{
1, 2, 32, 64,
4, 16, 128, 4096,
8, 256, 2048, 8192,
512, 1024, 16384, -32768
};
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
{
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
-ONE_TOKEN, 6, /* 2 = ONE */
8, 12, /* 3 = LOW_VAL */
-TWO_TOKEN, 10, /* 4 = TWO */
-THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
14, 16, /* 6 = HIGH_LOW */
-DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
18, 20, /* 8 = CAT_THREEFOUR */
-DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
};
/* vp8_coef_encodings generated with:
vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree);
*/
vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
{
{2, 2},
{6, 3},
{28, 5},
{58, 6},
{59, 6},
{60, 6},
{61, 6},
{124, 7},
{125, 7},
{126, 7},
{127, 7},
{0, 1}
};
/* Trees for extra bits. Probabilities are constant and
do not depend on previously encoded bits */
static const vp8_prob Pcat1[] = { 159};
static const vp8_prob Pcat2[] = { 165, 145};
static const vp8_prob Pcat3[] = { 173, 148, 140};
static const vp8_prob Pcat4[] = { 176, 155, 140, 135};
static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130};
static const vp8_prob Pcat6[] =
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
/* tree index tables generated with:
void init_bit_tree(vp8_tree_index *p, int n)
{
int i = 0;
while (++i < n)
{
p[0] = p[1] = i << 1;
p += 2;
}
p[0] = p[1] = 0;
}
void init_bit_trees()
{
init_bit_tree(cat1, 1);
init_bit_tree(cat2, 2);
init_bit_tree(cat3, 3);
init_bit_tree(cat4, 4);
init_bit_tree(cat5, 5);
init_bit_tree(cat6, 11);
}
*/
static const vp8_tree_index cat1[2] = { 0, 0 };
static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 };
static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 };
static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
14, 14, 16, 16, 18, 18, 20, 20, 0, 0 };
const vp8_extra_bit_struct vp8_extra_bits[12] =
{
{ 0, 0, 0, 0},
{ 0, 0, 0, 1},
{ 0, 0, 0, 2},
{ 0, 0, 0, 3},
{ 0, 0, 0, 4},
{ cat1, Pcat1, 1, 5},
{ cat2, Pcat2, 2, 7},
{ cat3, Pcat3, 3, 11},
{ cat4, Pcat4, 4, 19},
{ cat5, Pcat5, 5, 35},
{ cat6, Pcat6, 11, 67},
{ 0, 0, 0, 0}
};
#include "default_coef_probs.h"
void vp8_default_coef_probs(VP8_COMMON *pc)
{
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
}

Просмотреть файл

@ -1,109 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ENTROPY_H_
#define VP8_COMMON_ENTROPY_H_
#include "treecoder.h"
#include "blockd.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Coefficient token alphabet */
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
#define MAX_ENTROPY_TOKENS 12
#define ENTROPY_NODES 11
extern const vp8_tree_index vp8_coef_tree[];
extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
typedef struct
{
vp8_tree_p tree;
const vp8_prob *prob;
int Len;
int base_val;
} vp8_extra_bit_struct;
extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
#define PROB_UPDATE_BASELINE_COST 7
#define MAX_PROB 255
#define DCT_MAX_VALUE 2048
/* Coefficients are predicted via a 3-dimensional probability table. */
/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
#define BLOCK_TYPES 4
/* Middle dimension is a coarsening of the coefficient's
position within the 4x4 DCT. */
#define COEF_BANDS 8
extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
/* Inside dimension is 3-valued measure of nearby complexity, that is,
the extent to which nearby coefficients are nonzero. For the first
coefficient (DC, unless block type is 0), we look at the (already encoded)
blocks above and to the left of the current block. The context index is
then the number (0,1,or 2) of these blocks having nonzero coefficients.
After decoding a coefficient, the measure is roughly the size of the
most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1).
Note that the intuitive meaning of this measure changes as coefficients
are decoded, e.g., prior to the first token, a zero means that my neighbors
are empty while, after the first token, because of the use of end-of-block,
a zero means we just decoded a zero and hence guarantees that a non-zero
coefficient will appear later in this block. However, this shift
in meaning is perfectly OK because our context depends also on the
coefficient band (and since zigzag positions 0, 1, and 2 are in
distinct bands). */
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
# define PREV_COEF_CONTEXTS 3
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
struct VP8Common;
void vp8_default_coef_probs(struct VP8Common *);
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]);
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
void vp8_coef_tree_initialize(void);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_ENTROPY_H_

Просмотреть файл

@ -1,171 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#define USE_PREBUILT_TABLES
#include "entropymode.h"
#include "entropy.h"
#include "vpx_mem/vpx_mem.h"
#include "vp8_entropymodedata.h"
int vp8_mv_cont(const int_mv *l, const int_mv *a)
{
int lez = (l->as_int == 0);
int aez = (a->as_int == 0);
int lea = (l->as_int == a->as_int);
if (lea && lez)
return SUBMVREF_LEFT_ABOVE_ZED;
if (lea)
return SUBMVREF_LEFT_ABOVE_SAME;
if (aez)
return SUBMVREF_ABOVE_ZED;
if (lez)
return SUBMVREF_LEFT_ZED;
return SUBMVREF_NORMAL;
}
static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25};
const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] =
{
{ 147, 136, 18 },
{ 106, 145, 1 },
{ 179, 121, 1 },
{ 223, 1 , 34 },
{ 208, 1 , 1 }
};
const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] =
{
{
0, 0, 0, 0,
0, 0, 0, 0,
1, 1, 1, 1,
1, 1, 1, 1,
},
{
0, 0, 1, 1,
0, 0, 1, 1,
0, 0, 1, 1,
0, 0, 1, 1,
},
{
0, 0, 1, 1,
0, 0, 1, 1,
2, 2, 3, 3,
2, 2, 3, 3,
},
{
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15,
}
};
const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16};
const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150};
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */
{
-B_DC_PRED, 2, /* 0 = DC_NODE */
-B_TM_PRED, 4, /* 1 = TM_NODE */
-B_VE_PRED, 6, /* 2 = VE_NODE */
8, 12, /* 3 = COM_NODE */
-B_HE_PRED, 10, /* 4 = HE_NODE */
-B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
-B_LD_PRED, 14, /* 6 = LD_NODE */
-B_VL_PRED, 16, /* 7 = VL_NODE */
-B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
};
/* Again, these trees use the same probability indices as their
explicitly-programmed predecessors. */
const vp8_tree_index vp8_ymode_tree[8] =
{
-DC_PRED, 2,
4, 6,
-V_PRED, -H_PRED,
-TM_PRED, -B_PRED
};
const vp8_tree_index vp8_kf_ymode_tree[8] =
{
-B_PRED, 2,
4, 6,
-DC_PRED, -V_PRED,
-H_PRED, -TM_PRED
};
const vp8_tree_index vp8_uv_mode_tree[6] =
{
-DC_PRED, 2,
-V_PRED, 4,
-H_PRED, -TM_PRED
};
const vp8_tree_index vp8_mbsplit_tree[6] =
{
-3, 2,
-2, 4,
-0, -1
};
const vp8_tree_index vp8_mv_ref_tree[8] =
{
-ZEROMV, 2,
-NEARESTMV, 4,
-NEARMV, 6,
-NEWMV, -SPLITMV
};
const vp8_tree_index vp8_sub_mv_ref_tree[6] =
{
-LEFT4X4, 2,
-ABOVE4X4, 4,
-ZERO4X4, -NEW4X4
};
const vp8_tree_index vp8_small_mvtree [14] =
{
2, 8,
4, 6,
-0, -1,
-2, -3,
10, 12,
-4, -5,
-6, -7
};
void vp8_init_mbmode_probs(VP8_COMMON *x)
{
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
}
void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
{
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
}

Просмотреть файл

@ -1,88 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ENTROPYMODE_H_
#define VP8_COMMON_ENTROPYMODE_H_
#include "onyxc_int.h"
#include "treecoder.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef enum
{
SUBMVREF_NORMAL,
SUBMVREF_LEFT_ZED,
SUBMVREF_ABOVE_ZED,
SUBMVREF_LEFT_ABOVE_SAME,
SUBMVREF_LEFT_ABOVE_ZED
} sumvfref_t;
typedef int vp8_mbsplit[16];
#define VP8_NUMMBSPLITS 4
extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */
extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1];
extern int vp8_mv_cont(const int_mv *l, const int_mv *a);
#define SUBMVREF_COUNT 5
extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1];
extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES];
extern const vp8_tree_index vp8_bmode_tree[];
extern const vp8_tree_index vp8_ymode_tree[];
extern const vp8_tree_index vp8_kf_ymode_tree[];
extern const vp8_tree_index vp8_uv_mode_tree[];
extern const vp8_tree_index vp8_mbsplit_tree[];
extern const vp8_tree_index vp8_mv_ref_tree[];
extern const vp8_tree_index vp8_sub_mv_ref_tree[];
extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES];
extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES];
extern const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES];
extern const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES];
extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS];
/* Inter mode values do not start at zero */
extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS];
extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS];
extern const vp8_tree_index vp8_small_mvtree[];
extern const struct vp8_token_struct vp8_small_mvencodings[8];
/* Key frame default mode probs */
extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES]
[VP8_BINTRAMODES-1];
extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1];
extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1];
void vp8_init_mbmode_probs(VP8_COMMON *x);
void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_ENTROPYMODE_H_

Просмотреть файл

@ -1,49 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "entropymv.h"
const MV_CONTEXT vp8_mv_update_probs[2] =
{
{{
237,
246,
253, 253, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 250, 250, 252, 254, 254
}},
{{
231,
243,
245, 253, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 251, 251, 254, 254, 254
}}
};
const MV_CONTEXT vp8_default_mv_context[2] =
{
{{
/* row */
162, /* is short */
128, /* sign */
225, 146, 172, 147, 214, 39, 156, /* short tree */
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
}},
{{
/* same for column */
164, /* is short */
128,
204, 170, 119, 235, 140, 230, 228,
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
}}
};

Просмотреть файл

@ -1,52 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ENTROPYMV_H_
#define VP8_COMMON_ENTROPYMV_H_
#include "treecoder.h"
#ifdef __cplusplus
extern "C" {
#endif
enum
{
mv_max = 1023, /* max absolute value of a MV component */
MVvals = (2 * mv_max) + 1, /* # possible values "" */
mvfp_max = 255, /* max absolute value of a full pixel MV component */
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
mvnum_short = 8, /* magnitudes 0 through 7 */
/* probability offsets for coding each MV component */
mvpis_short = 0, /* short (<= 7) vs long (>= 8) */
MVPsign, /* sign for non-zero */
MVPshort, /* 8 short values = 7-position tree */
MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */
};
typedef struct mv_context
{
vp8_prob prob[MVPcount]; /* often come in row, col pairs */
} MV_CONTEXT;
extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_ENTROPYMV_H_

Просмотреть файл

@ -1,188 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "extend.h"
#include "vpx_mem/vpx_mem.h"
static void copy_and_extend_plane
(
unsigned char *s, /* source */
int sp, /* source pitch */
unsigned char *d, /* destination */
int dp, /* destination pitch */
int h, /* height */
int w, /* width */
int et, /* extend top border */
int el, /* extend left border */
int eb, /* extend bottom border */
int er /* extend right border */
)
{
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
int linesize;
/* copy the left and right most columns out */
src_ptr1 = s;
src_ptr2 = s + w - 1;
dest_ptr1 = d - el;
dest_ptr2 = d + w;
for (i = 0; i < h; i++)
{
memset(dest_ptr1, src_ptr1[0], el);
memcpy(dest_ptr1 + el, src_ptr1, w);
memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp;
src_ptr2 += sp;
dest_ptr1 += dp;
dest_ptr2 += dp;
}
/* Now copy the top and bottom lines into each line of the respective
* borders
*/
src_ptr1 = d - el;
src_ptr2 = d + dp * (h - 1) - el;
dest_ptr1 = d + dp * (-et) - el;
dest_ptr2 = d + dp * (h) - el;
linesize = el + er + w;
for (i = 0; i < et; i++)
{
memcpy(dest_ptr1, src_ptr1, linesize);
dest_ptr1 += dp;
}
for (i = 0; i < eb; i++)
{
memcpy(dest_ptr2, src_ptr2, linesize);
dest_ptr2 += dp;
}
}
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst)
{
int et = dst->border;
int el = dst->border;
int eb = dst->border + dst->y_height - src->y_height;
int er = dst->border + dst->y_width - src->y_width;
copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_height, src->y_width,
et, el, eb, er);
et = dst->border >> 1;
el = dst->border >> 1;
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
src->uv_height, src->uv_width,
et, el, eb, er);
copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
src->uv_height, src->uv_width,
et, el, eb, er);
}
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw)
{
int et = dst->border;
int el = dst->border;
int eb = dst->border + dst->y_height - src->y_height;
int er = dst->border + dst->y_width - src->y_width;
int src_y_offset = srcy * src->y_stride + srcx;
int dst_y_offset = srcy * dst->y_stride + srcx;
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
/* If the side is not touching the bounder then don't extend. */
if (srcy)
et = 0;
if (srcx)
el = 0;
if (srcy + srch != src->y_height)
eb = 0;
if (srcx + srcw != src->y_width)
er = 0;
copy_and_extend_plane(src->y_buffer + src_y_offset,
src->y_stride,
dst->y_buffer + dst_y_offset,
dst->y_stride,
srch, srcw,
et, el, eb, er);
et = (et + 1) >> 1;
el = (el + 1) >> 1;
eb = (eb + 1) >> 1;
er = (er + 1) >> 1;
srch = (srch + 1) >> 1;
srcw = (srcw + 1) >> 1;
copy_and_extend_plane(src->u_buffer + src_uv_offset,
src->uv_stride,
dst->u_buffer + dst_uv_offset,
dst->uv_stride,
srch, srcw,
et, el, eb, er);
copy_and_extend_plane(src->v_buffer + src_uv_offset,
src->uv_stride,
dst->v_buffer + dst_uv_offset,
dst->uv_stride,
srch, srcw,
et, el, eb, er);
}
/* note the extension is only for the last row, for intra prediction purpose */
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf,
unsigned char *YPtr,
unsigned char *UPtr,
unsigned char *VPtr)
{
int i;
YPtr += ybf->y_stride * 14;
UPtr += ybf->uv_stride * 6;
VPtr += ybf->uv_stride * 6;
for (i = 0; i < 4; i++)
{
YPtr[i] = YPtr[-1];
UPtr[i] = UPtr[-1];
VPtr[i] = VPtr[-1];
}
YPtr += ybf->y_stride;
UPtr += ybf->uv_stride;
VPtr += ybf->uv_stride;
for (i = 0; i < 4; i++)
{
YPtr[i] = YPtr[-1];
UPtr[i] = UPtr[-1];
VPtr[i] = VPtr[-1];
}
}

Просмотреть файл

@ -1,33 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_EXTEND_H_
#define VP8_COMMON_EXTEND_H_
#include "vpx_scale/yv12config.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_EXTEND_H_

Просмотреть файл

@ -1,493 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "filter.h"
#include "./vp8_rtcd.h"
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
{
{ 128, 0 },
{ 112, 16 },
{ 96, 32 },
{ 80, 48 },
{ 64, 64 },
{ 48, 80 },
{ 32, 96 },
{ 16, 112 }
};
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
{
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
{ 0, -6, 123, 12, -1, 0 },
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
{ 0, -9, 93, 50, -6, 0 },
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
{ 0, -6, 50, 93, -9, 0 },
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
};
static void filter_block2d_first_pass
(
unsigned char *src_ptr,
int *output_ptr,
unsigned int src_pixels_per_line,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter
)
{
unsigned int i, j;
int Temp;
for (i = 0; i < output_height; i++)
{
for (j = 0; j < output_width; j++)
{
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
((int)src_ptr[pixel_step] * vp8_filter[3]) +
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
if (Temp < 0)
Temp = 0;
else if (Temp > 255)
Temp = 255;
output_ptr[j] = Temp;
src_ptr++;
}
/* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
}
}
static void filter_block2d_second_pass
(
int *src_ptr,
unsigned char *output_ptr,
int output_pitch,
unsigned int src_pixels_per_line,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter
)
{
unsigned int i, j;
int Temp;
for (i = 0; i < output_height; i++)
{
for (j = 0; j < output_width; j++)
{
/* Apply filter */
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
((int)src_ptr[pixel_step] * vp8_filter[3]) +
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
if (Temp < 0)
Temp = 0;
else if (Temp > 255)
Temp = 255;
output_ptr[j] = (unsigned char)Temp;
src_ptr++;
}
/* Start next row */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_pitch;
}
}
static void filter_block2d
(
unsigned char *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
int output_pitch,
const short *HFilter,
const short *VFilter
)
{
int FData[9*4]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
}
void vp8_sixtap_predict4x4_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
}
void vp8_sixtap_predict8x8_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
int FData[13*16]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
}
void vp8_sixtap_predict8x4_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
int FData[13*16]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
}
void vp8_sixtap_predict16x16_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
int FData[21*24]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
}
/****************************************************************************
*
* ROUTINE : filter_block2d_bil_first_pass
*
* INPUTS : UINT8 *src_ptr : Pointer to source block.
* UINT32 src_stride : Stride of source block.
* UINT32 height : Block height.
* UINT32 width : Block width.
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
*
* OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
*
* RETURNS : void
*
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
* in the horizontal direction to produce the filtered output
* block. Used to implement first-pass of 2-D separable filter.
*
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
* Two filter taps should sum to VP8_FILTER_WEIGHT.
*
****************************************************************************/
static void filter_block2d_bil_first_pass
(
unsigned char *src_ptr,
unsigned short *dst_ptr,
unsigned int src_stride,
unsigned int height,
unsigned int width,
const short *vp8_filter
)
{
unsigned int i, j;
for (i = 0; i < height; i++)
{
for (j = 0; j < width; j++)
{
/* Apply bilinear filter */
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[1] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
src_ptr++;
}
/* Next row... */
src_ptr += src_stride - width;
dst_ptr += width;
}
}
/****************************************************************************
*
* ROUTINE : filter_block2d_bil_second_pass
*
* INPUTS : INT32 *src_ptr : Pointer to source block.
* UINT32 dst_pitch : Destination block pitch.
* UINT32 height : Block height.
* UINT32 width : Block width.
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
*
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
*
* RETURNS : void
*
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
* in the vertical direction to produce the filtered output
* block. Used to implement second-pass of 2-D separable filter.
*
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
* Two filter taps should sum to VP8_FILTER_WEIGHT.
*
****************************************************************************/
static void filter_block2d_bil_second_pass
(
unsigned short *src_ptr,
unsigned char *dst_ptr,
int dst_pitch,
unsigned int height,
unsigned int width,
const short *vp8_filter
)
{
unsigned int i, j;
int Temp;
for (i = 0; i < height; i++)
{
for (j = 0; j < width; j++)
{
/* Apply filter */
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[width] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2);
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
src_ptr++;
}
/* Next row... */
dst_ptr += dst_pitch;
}
}
/****************************************************************************
*
* ROUTINE : filter_block2d_bil
*
* INPUTS : UINT8 *src_ptr : Pointer to source block.
* UINT32 src_pitch : Stride of source block.
* UINT32 dst_pitch : Stride of destination block.
* INT32 *HFilter : Array of 2 horizontal filter taps.
* INT32 *VFilter : Array of 2 vertical filter taps.
* INT32 Width : Block width
* INT32 Height : Block height
*
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
*
* RETURNS : void
*
* FUNCTION : 2-D filters an input block by applying a 2-tap
* bi-linear filter horizontally followed by a 2-tap
* bi-linear filter vertically on the result.
*
* SPECIAL NOTES : The largest block size can be handled here is 16x16
*
****************************************************************************/
static void filter_block2d_bil
(
unsigned char *src_ptr,
unsigned char *dst_ptr,
unsigned int src_pitch,
unsigned int dst_pitch,
const short *HFilter,
const short *VFilter,
int Width,
int Height
)
{
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
/* then 1-D vertically... */
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
}
void vp8_bilinear_predict4x4_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
#if 0
{
int i;
unsigned char temp1[16];
unsigned char temp2[16];
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
for (i = 0; i < 16; i++)
{
if (temp1[i] != temp2[i])
{
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
}
}
}
#endif
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
}
void vp8_bilinear_predict8x8_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
}
void vp8_bilinear_predict8x4_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
}
void vp8_bilinear_predict16x16_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
}

Просмотреть файл

@ -1,32 +0,0 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_FILTER_H_
#define VP8_COMMON_FILTER_H_
#include "vpx_ports/mem.h"
#ifdef __cplusplus
extern "C" {
#endif
#define BLOCK_HEIGHT_WIDTH 4
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_FILTER_H_

Просмотреть файл

@ -1,193 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "findnearmv.h"
const unsigned char vp8_mbsplit_offset[4][16] = {
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
};
/* Predict motion vectors using those from already-decoded nearby blocks.
Note that we only consider one 4x4 subblock from each candidate 16x16
macroblock. */
void vp8_find_near_mvs
(
MACROBLOCKD *xd,
const MODE_INFO *here,
int_mv *nearest,
int_mv *nearby,
int_mv *best_mv,
int cnt[4],
int refframe,
int *ref_frame_sign_bias
)
{
const MODE_INFO *above = here - xd->mode_info_stride;
const MODE_INFO *left = here - 1;
const MODE_INFO *aboveleft = above - 1;
int_mv near_mvs[4];
int_mv *mv = near_mvs;
int *cntx = cnt;
enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV};
/* Zero accumulators */
mv[0].as_int = mv[1].as_int = mv[2].as_int = 0;
cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
/* Process above */
if (above->mbmi.ref_frame != INTRA_FRAME)
{
if (above->mbmi.mv.as_int)
{
(++mv)->as_int = above->mbmi.mv.as_int;
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
++cntx;
}
*cntx += 2;
}
/* Process left */
if (left->mbmi.ref_frame != INTRA_FRAME)
{
if (left->mbmi.mv.as_int)
{
int_mv this_mv;
this_mv.as_int = left->mbmi.mv.as_int;
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
if (this_mv.as_int != mv->as_int)
{
(++mv)->as_int = this_mv.as_int;
++cntx;
}
*cntx += 2;
}
else
cnt[CNT_INTRA] += 2;
}
/* Process above left */
if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
{
if (aboveleft->mbmi.mv.as_int)
{
int_mv this_mv;
this_mv.as_int = aboveleft->mbmi.mv.as_int;
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
if (this_mv.as_int != mv->as_int)
{
(++mv)->as_int = this_mv.as_int;
++cntx;
}
*cntx += 1;
}
else
cnt[CNT_INTRA] += 1;
}
/* If we have three distinct MV's ... */
if (cnt[CNT_SPLITMV])
{
/* See if above-left MV can be merged with NEAREST */
if (mv->as_int == near_mvs[CNT_NEAREST].as_int)
cnt[CNT_NEAREST] += 1;
}
cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV)
+ (left->mbmi.mode == SPLITMV)) * 2
+ (aboveleft->mbmi.mode == SPLITMV);
/* Swap near and nearest if necessary */
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST])
{
int tmp;
tmp = cnt[CNT_NEAREST];
cnt[CNT_NEAREST] = cnt[CNT_NEAR];
cnt[CNT_NEAR] = tmp;
tmp = near_mvs[CNT_NEAREST].as_int;
near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
near_mvs[CNT_NEAR].as_int = tmp;
}
/* Use near_mvs[0] to store the "best" MV */
if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
/* Set up return values */
best_mv->as_int = near_mvs[0].as_int;
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
nearby->as_int = near_mvs[CNT_NEAR].as_int;
}
static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd)
{
inv->as_mv.row = src->as_mv.row * -1;
inv->as_mv.col = src->as_mv.col * -1;
vp8_clamp_mv2(inv, xd);
vp8_clamp_mv2(src, xd);
}
int vp8_find_near_mvs_bias
(
MACROBLOCKD *xd,
const MODE_INFO *here,
int_mv mode_mv_sb[2][MB_MODE_COUNT],
int_mv best_mv_sb[2],
int cnt[4],
int refframe,
int *ref_frame_sign_bias
)
{
int sign_bias = ref_frame_sign_bias[refframe];
vp8_find_near_mvs(xd,
here,
&mode_mv_sb[sign_bias][NEARESTMV],
&mode_mv_sb[sign_bias][NEARMV],
&best_mv_sb[sign_bias],
cnt,
refframe,
ref_frame_sign_bias);
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV],
&mode_mv_sb[sign_bias][NEARESTMV], xd);
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV],
&mode_mv_sb[sign_bias][NEARMV], xd);
invert_and_clamp_mvs(&best_mv_sb[!sign_bias],
&best_mv_sb[sign_bias], xd);
return sign_bias;
}
vp8_prob *vp8_mv_ref_probs(
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
)
{
p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0];
p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
/*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
return p;
}

Просмотреть файл

@ -1,194 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_FINDNEARMV_H_
#define VP8_COMMON_FINDNEARMV_H_
#include "./vpx_config.h"
#include "mv.h"
#include "blockd.h"
#include "modecont.h"
#include "treecoder.h"
#ifdef __cplusplus
extern "C" {
#endif
static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
int_mv *mvp, const int *ref_frame_sign_bias)
{
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
{
mvp->as_mv.row *= -1;
mvp->as_mv.col *= -1;
}
}
#define LEFT_TOP_MARGIN (16 << 3)
#define RIGHT_BOTTOM_MARGIN (16 << 3)
static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
{
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
}
static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge,
int mb_to_right_edge, int mb_to_top_edge,
int mb_to_bottom_edge)
{
mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
mb_to_left_edge : mv->as_mv.col;
mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
mb_to_right_edge : mv->as_mv.col;
mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
mb_to_top_edge : mv->as_mv.row;
mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
mb_to_bottom_edge : mv->as_mv.row;
}
static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
int mb_to_right_edge,
int mb_to_top_edge,
int mb_to_bottom_edge)
{
unsigned int need_to_clamp;
need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
return need_to_clamp;
}
void vp8_find_near_mvs
(
MACROBLOCKD *xd,
const MODE_INFO *here,
int_mv *nearest, int_mv *nearby, int_mv *best,
int near_mv_ref_cts[4],
int refframe,
int *ref_frame_sign_bias
);
int vp8_find_near_mvs_bias
(
MACROBLOCKD *xd,
const MODE_INFO *here,
int_mv mode_mv_sb[2][MB_MODE_COUNT],
int_mv best_mv_sb[2],
int cnt[4],
int refframe,
int *ref_frame_sign_bias
);
vp8_prob *vp8_mv_ref_probs(
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
);
extern const unsigned char vp8_mbsplit_offset[4][16];
static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
/* On L edge, get from MB to left of us */
--cur_mb;
if(cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv.as_int;
b += 4;
}
return (cur_mb->bmi + b - 1)->mv.as_int;
}
static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
{
if (!(b >> 2))
{
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
if(cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv.as_int;
b += 16;
}
return (cur_mb->bmi + (b - 4))->mv.as_int;
}
static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
/* On L edge, get from MB to left of us */
--cur_mb;
switch (cur_mb->mbmi.mode)
{
case B_PRED:
return (cur_mb->bmi + b + 3)->as_mode;
case DC_PRED:
return B_DC_PRED;
case V_PRED:
return B_VE_PRED;
case H_PRED:
return B_HE_PRED;
case TM_PRED:
return B_TM_PRED;
default:
return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 1)->as_mode;
}
static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b,
int mi_stride)
{
if (!(b >> 2))
{
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
switch (cur_mb->mbmi.mode)
{
case B_PRED:
return (cur_mb->bmi + b + 12)->as_mode;
case DC_PRED:
return B_DC_PRED;
case V_PRED:
return B_VE_PRED;
case H_PRED:
return B_HE_PRED;
case TM_PRED:
return B_TM_PRED;
default:
return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 4)->as_mode;
}
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_FINDNEARMV_H_

Просмотреть файл

@ -1,104 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#if ARCH_ARM
#include "vpx_ports/arm.h"
#elif ARCH_X86 || ARCH_X86_64
#include "vpx_ports/x86.h"
#endif
#include "vp8/common/onyxc_int.h"
#include "vp8/common/systemdependent.h"
#if CONFIG_MULTITHREAD
#if HAVE_UNISTD_H && !defined(__OS2__)
#include <unistd.h>
#elif defined(_WIN32)
#include <windows.h>
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
#elif defined(__OS2__)
#define INCL_DOS
#define INCL_DOSSPINLOCK
#include <os2.h>
#endif
#endif
#if CONFIG_MULTITHREAD
static int get_cpu_count()
{
int core_count = 16;
#if HAVE_UNISTD_H && !defined(__OS2__)
#if defined(_SC_NPROCESSORS_ONLN)
core_count = sysconf(_SC_NPROCESSORS_ONLN);
#elif defined(_SC_NPROC_ONLN)
core_count = sysconf(_SC_NPROC_ONLN);
#endif
#elif defined(_WIN32)
{
#if _WIN32_WINNT >= 0x0501
SYSTEM_INFO sysinfo;
GetNativeSystemInfo(&sysinfo);
#else
PGNSI pGNSI;
SYSTEM_INFO sysinfo;
/* Call GetNativeSystemInfo if supported or
* GetSystemInfo otherwise. */
pGNSI = (PGNSI) GetProcAddress(
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
if (pGNSI != NULL)
pGNSI(&sysinfo);
else
GetSystemInfo(&sysinfo);
#endif
core_count = sysinfo.dwNumberOfProcessors;
}
#elif defined(__OS2__)
{
ULONG proc_id;
ULONG status;
core_count = 0;
for (proc_id = 1; ; proc_id++)
{
if (DosGetProcessorStatus(proc_id, &status))
break;
if (status == PROC_ONLINE)
core_count++;
}
}
#else
/* other platforms */
#endif
return core_count > 0 ? core_count : 1;
}
#endif
void vp8_clear_system_state_c() {};
void vp8_machine_specific_config(VP8_COMMON *ctx)
{
#if CONFIG_MULTITHREAD
ctx->processor_core_count = get_cpu_count();
#endif /* CONFIG_MULTITHREAD */
#if ARCH_ARM
ctx->cpu_caps = arm_cpu_caps();
#elif ARCH_X86 || ARCH_X86_64
ctx->cpu_caps = x86_simd_caps();
#endif
}

Просмотреть файл

@ -1,51 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_HEADER_H_
#define VP8_COMMON_HEADER_H_
#ifdef __cplusplus
extern "C" {
#endif
/* 24 bits total */
typedef struct
{
unsigned int type: 1;
unsigned int version: 3;
unsigned int show_frame: 1;
/* Allow 2^20 bytes = 8 megabits for first partition */
unsigned int first_partition_length_in_bytes: 19;
#ifdef PACKET_TESTING
unsigned int frame_number;
unsigned int update_gold: 1;
unsigned int uses_gold: 1;
unsigned int update_last: 1;
unsigned int uses_last: 1;
#endif
} VP8_HEADER;
#ifdef PACKET_TESTING
#define VP8_HEADER_SIZE 8
#else
#define VP8_HEADER_SIZE 3
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_HEADER_H_

Просмотреть файл

@ -1,90 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
void vp8_dequant_idct_add_c(short *input, short *dq,
unsigned char *dest, int stride);
void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred,
int pred_stride, unsigned char *dst_ptr,
int dst_stride);
void vp8_dequant_idct_add_y_block_c
(short *q, short *dq,
unsigned char *dst, int stride, char *eobs)
{
int i, j;
for (i = 0; i < 4; i++)
{
for (j = 0; j < 4; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_c (q, dq, dst, stride);
else
{
vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
q += 16;
dst += 4;
}
dst += 4*stride - 16;
}
}
void vp8_dequant_idct_add_uv_block_c
(short *q, short *dq,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
{
int i, j;
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_c (q, dq, dstu, stride);
else
{
vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
q += 16;
dstu += 4;
}
dstu += 4*stride - 8;
}
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_c (q, dq, dstv, stride);
else
{
vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
q += 16;
dstv += 4;
}
dstv += 4*stride - 8;
}
}

Просмотреть файл

@ -1,205 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
/****************************************************************************
* Notes:
*
* This implementation makes use of 16 bit fixed point verio of two multiply
* constants:
* 1. sqrt(2) * cos (pi/8)
* 2. sqrt(2) * sin (pi/8)
* Becuase the first constant is bigger than 1, to maintain the same 16 bit
* fixed point precision as the second one, we use a trick of
* x * a = x + x*(a-1)
* so
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
**************************************************************************/
static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride)
{
int i;
int r, c;
int a1, b1, c1, d1;
short output[16];
short *ip = input;
short *op = output;
int temp1, temp2;
int shortpitch = 4;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[8];
b1 = ip[0] - ip[8];
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
op[shortpitch*0] = a1 + d1;
op[shortpitch*3] = a1 - d1;
op[shortpitch*1] = b1 + c1;
op[shortpitch*2] = b1 - c1;
ip++;
op++;
}
ip = output;
op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[2];
b1 = ip[0] - ip[2];
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
op[0] = (a1 + d1 + 4) >> 3;
op[3] = (a1 - d1 + 4) >> 3;
op[1] = (b1 + c1 + 4) >> 3;
op[2] = (b1 - c1 + 4) >> 3;
ip += shortpitch;
op += shortpitch;
}
ip = output;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
int a = ip[c] + pred_ptr[c] ;
if (a < 0)
a = 0;
if (a > 255)
a = 255;
dst_ptr[c] = (unsigned char) a ;
}
ip += 4;
dst_ptr += dst_stride;
pred_ptr += pred_stride;
}
}
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride)
{
int a1 = ((input_dc + 4) >> 3);
int r, c;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
int a = a1 + pred_ptr[c] ;
if (a < 0)
a = 0;
if (a > 255)
a = 255;
dst_ptr[c] = (unsigned char) a ;
}
dst_ptr += dst_stride;
pred_ptr += pred_stride;
}
}
void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff)
{
short output[16];
int i;
int a1, b1, c1, d1;
int a2, b2, c2, d2;
short *ip = input;
short *op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
op[0] = a1 + b1;
op[4] = c1 + d1;
op[8] = a1 - b1;
op[12] = d1 - c1;
ip++;
op++;
}
ip = output;
op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[3];
b1 = ip[1] + ip[2];
c1 = ip[1] - ip[2];
d1 = ip[0] - ip[3];
a2 = a1 + b1;
b2 = c1 + d1;
c2 = a1 - b1;
d2 = d1 - c1;
op[0] = (a2 + 3) >> 3;
op[1] = (b2 + 3) >> 3;
op[2] = (c2 + 3) >> 3;
op[3] = (d2 + 3) >> 3;
ip += 4;
op += 4;
}
for(i = 0; i < 16; i++)
{
mb_dqcoeff[i * 16] = output[i];
}
}
void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff)
{
int i;
int a1;
a1 = ((input[0] + 3) >> 3);
for(i = 0; i < 16; i++)
{
mb_dqcoeff[i * 16] = a1;
}
}

Просмотреть файл

@ -1,70 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_INVTRANS_H_
#define VP8_COMMON_INVTRANS_H_
#include "./vpx_config.h"
#include "vp8_rtcd.h"
#include "blockd.h"
#include "onyxc_int.h"
#if CONFIG_MULTITHREAD
#include "vpx_mem/vpx_mem.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
static void eob_adjust(char *eobs, short *diff)
{
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
int js;
for(js = 0; js < 16; js++)
{
if((eobs[js] == 0) && (diff[0] != 0))
eobs[js]++;
diff+=16;
}
}
static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd)
{
short *DQC = xd->dequant_y1;
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
vp8_short_inv_walsh4x4
(&xd->block[24].dqcoeff[0], xd->qcoeff);
}
else
{
vp8_short_inv_walsh4x4_1
(&xd->block[24].dqcoeff[0], xd->qcoeff);
}
eob_adjust(xd->eobs, xd->qcoeff);
DQC = xd->dequant_y1_dc;
}
vp8_dequant_idct_add_y_block
(xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
}
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_INVTRANS_H_

Просмотреть файл

@ -1,113 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_LOOPFILTER_H_
#define VP8_COMMON_LOOPFILTER_H_
#include "vpx_ports/mem.h"
#include "vpx_config.h"
#include "vp8_rtcd.h"
#ifdef __cplusplus
extern "C" {
#endif
#define MAX_LOOP_FILTER 63
/* fraction of total macroblock rows to be used in fast filter level picking */
/* has to be > 2 */
#define PARTIAL_FRAME_FRACTION 8
typedef enum
{
NORMAL_LOOPFILTER = 0,
SIMPLE_LOOPFILTER = 1
} LOOPFILTERTYPE;
#if ARCH_ARM
#define SIMD_WIDTH 1
#else
#define SIMD_WIDTH 16
#endif
/* Need to align this structure so when it is declared and
* passed it can be loaded into vector registers.
*/
typedef struct
{
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
unsigned char lvl[4][4][4];
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
unsigned char mode_lf_lut[10];
} loop_filter_info_n;
typedef struct loop_filter_info
{
const unsigned char * mblim;
const unsigned char * blim;
const unsigned char * lim;
const unsigned char * hev_thr;
} loop_filter_info;
typedef void loop_filter_uvfunction
(
unsigned char *u, /* source pointer */
int p, /* pitch */
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
unsigned char *v
);
/* assorted loopfilter functions which get used elsewhere */
struct VP8Common;
struct macroblockd;
struct modeinfo;
void vp8_loop_filter_init(struct VP8Common *cm);
void vp8_loop_filter_frame_init(struct VP8Common *cm,
struct macroblockd *mbd,
int default_filt_lvl);
void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
int frame_type);
void vp8_loop_filter_partial_frame(struct VP8Common *cm,
struct macroblockd *mbd,
int default_filt_lvl);
void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
struct macroblockd *mbd,
int default_filt_lvl);
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
int sharpness_lvl);
void vp8_loop_filter_row_normal(struct VP8Common *cm,
struct modeinfo *mode_info_context,
int mb_row, int post_ystride, int post_uvstride,
unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr);
void vp8_loop_filter_row_simple(struct VP8Common *cm,
struct modeinfo *mode_info_context,
int mb_row, int post_ystride, int post_uvstride,
unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_LOOPFILTER_H_

Просмотреть файл

@ -1,430 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "loopfilter.h"
#include "onyxc_int.h"
typedef unsigned char uc;
static signed char vp8_signed_char_clamp(int t)
{
t = (t < -128 ? -128 : t);
t = (t > 127 ? 127 : t);
return (signed char) t;
}
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
static signed char vp8_filter_mask(uc limit, uc blimit,
uc p3, uc p2, uc p1, uc p0,
uc q0, uc q1, uc q2, uc q3)
{
signed char mask = 0;
mask |= (abs(p3 - p2) > limit);
mask |= (abs(p2 - p1) > limit);
mask |= (abs(p1 - p0) > limit);
mask |= (abs(q1 - q0) > limit);
mask |= (abs(q2 - q1) > limit);
mask |= (abs(q3 - q2) > limit);
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit);
return mask - 1;
}
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
{
signed char hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
hev |= (abs(q1 - q0) > thresh) * -1;
return hev;
}
static void vp8_filter(signed char mask, uc hev, uc *op1,
uc *op0, uc *oq0, uc *oq1)
{
signed char ps0, qs0;
signed char ps1, qs1;
signed char filter_value, Filter1, Filter2;
signed char u;
ps1 = (signed char) * op1 ^ 0x80;
ps0 = (signed char) * op0 ^ 0x80;
qs0 = (signed char) * oq0 ^ 0x80;
qs1 = (signed char) * oq1 ^ 0x80;
/* add outer taps if we have high edge variance */
filter_value = vp8_signed_char_clamp(ps1 - qs1);
filter_value &= hev;
/* inner taps */
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
filter_value &= mask;
/* save bottom 3 bits so that we round one side +4 and the other +3
* if it equals 4 we'll set to adjust by -1 to account for the fact
* we'd round 3 the other way
*/
Filter1 = vp8_signed_char_clamp(filter_value + 4);
Filter2 = vp8_signed_char_clamp(filter_value + 3);
Filter1 >>= 3;
Filter2 >>= 3;
u = vp8_signed_char_clamp(qs0 - Filter1);
*oq0 = u ^ 0x80;
u = vp8_signed_char_clamp(ps0 + Filter2);
*op0 = u ^ 0x80;
filter_value = Filter1;
/* outer tap adjustments */
filter_value += 1;
filter_value >>= 1;
filter_value &= ~hev;
u = vp8_signed_char_clamp(qs1 - filter_value);
*oq1 = u ^ 0x80;
u = vp8_signed_char_clamp(ps1 + filter_value);
*op1 = u ^ 0x80;
}
void vp8_loop_filter_horizontal_edge_c
(
unsigned char *s,
int p, /* pitch */
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
int hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
/* loop filter designed to work using chars so that we can make maximum use
* of 8 bit simd instructions.
*/
do
{
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
s[0*p], s[1*p], s[2*p], s[3*p]);
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
}
while (++i < count * 8);
}
void vp8_loop_filter_vertical_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
int hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
/* loop filter designed to work using chars so that we can make maximum use
* of 8 bit simd instructions.
*/
do
{
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
s += p;
}
while (++i < count * 8);
}
static void vp8_mbfilter(signed char mask, uc hev,
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
{
signed char s, u;
signed char filter_value, Filter1, Filter2;
signed char ps2 = (signed char) * op2 ^ 0x80;
signed char ps1 = (signed char) * op1 ^ 0x80;
signed char ps0 = (signed char) * op0 ^ 0x80;
signed char qs0 = (signed char) * oq0 ^ 0x80;
signed char qs1 = (signed char) * oq1 ^ 0x80;
signed char qs2 = (signed char) * oq2 ^ 0x80;
/* add outer taps if we have high edge variance */
filter_value = vp8_signed_char_clamp(ps1 - qs1);
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
filter_value &= mask;
Filter2 = filter_value;
Filter2 &= hev;
/* save bottom 3 bits so that we round one side +4 and the other +3 */
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
Filter1 >>= 3;
Filter2 >>= 3;
qs0 = vp8_signed_char_clamp(qs0 - Filter1);
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
/* only apply wider filter if not high edge variance */
filter_value &= ~hev;
Filter2 = filter_value;
/* roughly 3/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
s = vp8_signed_char_clamp(qs0 - u);
*oq0 = s ^ 0x80;
s = vp8_signed_char_clamp(ps0 + u);
*op0 = s ^ 0x80;
/* roughly 2/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
s = vp8_signed_char_clamp(qs1 - u);
*oq1 = s ^ 0x80;
s = vp8_signed_char_clamp(ps1 + u);
*op1 = s ^ 0x80;
/* roughly 1/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
s = vp8_signed_char_clamp(qs2 - u);
*oq2 = s ^ 0x80;
s = vp8_signed_char_clamp(ps2 + u);
*op2 = s ^ 0x80;
}
void vp8_mbloop_filter_horizontal_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
signed char hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
/* loop filter designed to work using chars so that we can make maximum use
* of 8 bit simd instructions.
*/
do
{
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
s[0*p], s[1*p], s[2*p], s[3*p]);
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
++s;
}
while (++i < count * 8);
}
void vp8_mbloop_filter_vertical_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
signed char hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
do
{
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
s += p;
}
while (++i < count * 8);
}
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
{
/* Why does this cause problems for win32?
* error C2143: syntax error : missing ';' before 'type'
* (void) limit;
*/
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
return mask;
}
static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
{
signed char filter_value, Filter1, Filter2;
signed char p1 = (signed char) * op1 ^ 0x80;
signed char p0 = (signed char) * op0 ^ 0x80;
signed char q0 = (signed char) * oq0 ^ 0x80;
signed char q1 = (signed char) * oq1 ^ 0x80;
signed char u;
filter_value = vp8_signed_char_clamp(p1 - q1);
filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
filter_value &= mask;
/* save bottom 3 bits so that we round one side +4 and the other +3 */
Filter1 = vp8_signed_char_clamp(filter_value + 4);
Filter1 >>= 3;
u = vp8_signed_char_clamp(q0 - Filter1);
*oq0 = u ^ 0x80;
Filter2 = vp8_signed_char_clamp(filter_value + 3);
Filter2 >>= 3;
u = vp8_signed_char_clamp(p0 + Filter2);
*op0 = u ^ 0x80;
}
void vp8_loop_filter_simple_horizontal_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit
)
{
signed char mask = 0;
int i = 0;
do
{
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
}
while (++i < 16);
}
void vp8_loop_filter_simple_vertical_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit
)
{
signed char mask = 0;
int i = 0;
do
{
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
s += p;
}
while (++i < 16);
}
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
}

Просмотреть файл

@ -1,68 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "blockd.h"
void vp8_setup_block_dptrs(MACROBLOCKD *x)
{
int r, c;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
}
}
for (r = 0; r < 2; r++)
{
for (c = 0; c < 2; c++)
{
x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
}
}
for (r = 0; r < 2; r++)
{
for (c = 0; c < 2; c++)
{
x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
}
}
for (r = 0; r < 25; r++)
{
x->block[r].qcoeff = x->qcoeff + r * 16;
x->block[r].dqcoeff = x->dqcoeff + r * 16;
x->block[r].eob = x->eobs + r;
}
}
void vp8_build_block_doffsets(MACROBLOCKD *x)
{
int block;
for (block = 0; block < 16; block++) /* y blocks */
{
x->block[block].offset =
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4;
}
for (block = 16; block < 20; block++) /* U and V blocks */
{
x->block[block+4].offset =
x->block[block].offset =
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
}
}

Просмотреть файл

@ -1,386 +0,0 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/* MFQE: Multiframe Quality Enhancement
* In rate limited situations keyframes may cause significant visual artifacts
* commonly referred to as "popping." This file implements a postproccesing
* algorithm which blends data from the preceeding frame when there is no
* motion and the q from the previous frame is lower which indicates that it is
* higher quality.
*/
#include "./vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8/common/postproc.h"
#include "vpx_dsp/variance.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_scale/yv12config.h"
#include <limits.h>
#include <stdlib.h>
static void filter_by_weight(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride,
int block_size, int src_weight)
{
int dst_weight = (1 << MFQE_PRECISION) - src_weight;
int rounding_bit = 1 << (MFQE_PRECISION - 1);
int r, c;
for (r = 0; r < block_size; r++)
{
for (c = 0; c < block_size; c++)
{
dst[c] = (src[c] * src_weight +
dst[c] * dst_weight +
rounding_bit) >> MFQE_PRECISION;
}
src += src_stride;
dst += dst_stride;
}
}
void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride,
int src_weight)
{
filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
}
void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride,
int src_weight)
{
filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
}
void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride,
int src_weight)
{
filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
}
static void apply_ifactor(unsigned char *y_src,
int y_src_stride,
unsigned char *y_dst,
int y_dst_stride,
unsigned char *u_src,
unsigned char *v_src,
int uv_src_stride,
unsigned char *u_dst,
unsigned char *v_dst,
int uv_dst_stride,
int block_size,
int src_weight)
{
if (block_size == 16)
{
vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
}
else /* if (block_size == 8) */
{
vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
}
}
static unsigned int int_sqrt(unsigned int x)
{
unsigned int y = x;
unsigned int guess;
int p = 1;
while (y>>=1) p++;
p>>=1;
guess=0;
while (p>=0)
{
guess |= (1<<p);
if (x<guess*guess)
guess -= (1<<p);
p--;
}
/* choose between guess or guess+1 */
return guess+(guess*guess+guess+1<=x);
}
#define USE_SSD
static void multiframe_quality_enhance_block
(
int blksize, /* Currently only values supported are 16, 8 */
int qcurr,
int qprev,
unsigned char *y,
unsigned char *u,
unsigned char *v,
int y_stride,
int uv_stride,
unsigned char *yd,
unsigned char *ud,
unsigned char *vd,
int yd_stride,
int uvd_stride
)
{
static const unsigned char VP8_ZEROS[16]=
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
int uvblksize = blksize >> 1;
int qdiff = qcurr - qprev;
int i;
unsigned char *up;
unsigned char *udp;
unsigned char *vp;
unsigned char *vdp;
unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
if (blksize == 16)
{
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
#ifdef USE_SSD
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 128)>>8;
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 32)>>6;
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 32)>>6;
#else
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
#endif
}
else /* if (blksize == 8) */
{
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
#ifdef USE_SSD
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 32)>>6;
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 8)>>4;
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 8)>>4;
#else
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
#endif
}
actrisk = (actd > act * 5);
/* thr = qdiff/16 + log2(act) + log4(qprev) */
thr = (qdiff >> 4);
while (actd >>= 1) thr++;
while (qprev >>= 2) thr++;
#ifdef USE_SSD
thrsq = thr * thr;
if (sad < thrsq &&
/* additional checks for color mismatch and excessive addition of
* high-frequencies */
4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
#else
if (sad < thr &&
/* additional checks for color mismatch and excessive addition of
* high-frequencies */
2 * usad < thr && 2 * vsad < thr && !actrisk)
#endif
{
int ifactor;
#ifdef USE_SSD
/* TODO: optimize this later to not need sqr root */
sad = int_sqrt(sad);
#endif
ifactor = (sad << MFQE_PRECISION) / thr;
ifactor >>= (qdiff >> 5);
if (ifactor)
{
apply_ifactor(y, y_stride, yd, yd_stride,
u, v, uv_stride,
ud, vd, uvd_stride,
blksize, ifactor);
}
}
else /* else implicitly copy from previous frame */
{
if (blksize == 16)
{
vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
}
else /* if (blksize == 8) */
{
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
memcpy(udp, up, uvblksize);
for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
memcpy(vdp, vp, uvblksize);
}
}
}
static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map)
{
if (mode_info_context->mbmi.mb_skip_coeff)
map[0] = map[1] = map[2] = map[3] = 1;
else if (mode_info_context->mbmi.mode==SPLITMV)
{
static int ndx[4][4] =
{
{0, 1, 4, 5},
{2, 3, 6, 7},
{8, 9, 12, 13},
{10, 11, 14, 15}
};
int i, j;
for (i=0; i<4; ++i)
{
map[i] = 1;
for (j=0; j<4 && map[j]; ++j)
map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
}
}
else
{
map[0] = map[1] = map[2] = map[3] =
(mode_info_context->mbmi.mode > B_PRED &&
abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
}
return (map[0]+map[1]+map[2]+map[3]);
}
void vp8_multiframe_quality_enhance
(
VP8_COMMON *cm
)
{
YV12_BUFFER_CONFIG *show = cm->frame_to_show;
YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
FRAME_TYPE frame_type = cm->frame_type;
/* Point at base of Mb MODE_INFO list has motion vectors etc */
const MODE_INFO *mode_info_context = cm->show_frame_mi;
int mb_row;
int mb_col;
int totmap, map[4];
int qcurr = cm->base_qindex;
int qprev = cm->postproc_state.last_base_qindex;
unsigned char *y_ptr, *u_ptr, *v_ptr;
unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
/* Set up the buffer pointers */
y_ptr = show->y_buffer;
u_ptr = show->u_buffer;
v_ptr = show->v_buffer;
yd_ptr = dest->y_buffer;
ud_ptr = dest->u_buffer;
vd_ptr = dest->v_buffer;
/* postprocess each macro block */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
/* if motion is high there will likely be no benefit */
if (frame_type == INTER_FRAME) totmap = qualify_inter_mb(mode_info_context, map);
else totmap = (frame_type == KEY_FRAME ? 4 : 0);
if (totmap)
{
if (totmap < 4)
{
int i, j;
for (i=0; i<2; ++i)
for (j=0; j<2; ++j)
{
if (map[i*2+j])
{
multiframe_quality_enhance_block(8, qcurr, qprev,
y_ptr + 8*(i*show->y_stride+j),
u_ptr + 4*(i*show->uv_stride+j),
v_ptr + 4*(i*show->uv_stride+j),
show->y_stride,
show->uv_stride,
yd_ptr + 8*(i*dest->y_stride+j),
ud_ptr + 4*(i*dest->uv_stride+j),
vd_ptr + 4*(i*dest->uv_stride+j),
dest->y_stride,
dest->uv_stride);
}
else
{
/* copy a 8x8 block */
int k;
unsigned char *up = u_ptr + 4*(i*show->uv_stride+j);
unsigned char *udp = ud_ptr + 4*(i*dest->uv_stride+j);
unsigned char *vp = v_ptr + 4*(i*show->uv_stride+j);
unsigned char *vdp = vd_ptr + 4*(i*dest->uv_stride+j);
vp8_copy_mem8x8(y_ptr + 8*(i*show->y_stride+j), show->y_stride,
yd_ptr + 8*(i*dest->y_stride+j), dest->y_stride);
for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
vp += show->uv_stride, vdp += dest->uv_stride)
{
memcpy(udp, up, 4);
memcpy(vdp, vp, 4);
}
}
}
}
else /* totmap = 4 */
{
multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr,
u_ptr, v_ptr,
show->y_stride,
show->uv_stride,
yd_ptr, ud_ptr, vd_ptr,
dest->y_stride,
dest->uv_stride);
}
}
else
{
vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
}
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
yd_ptr += 16;
ud_ptr += 8;
vd_ptr += 8;
mode_info_context++; /* step to next MB */
}
y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
mode_info_context++; /* Skip border mb */
}
}

Просмотреть файл

@ -1,33 +0,0 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
void vp8_dequant_idct_add_dspr2(short *input, short *dq,
unsigned char *dest, int stride)
{
int i;
for (i = 0; i < 16; i++)
{
input[i] = dq[i] * input[i];
}
vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
memset(input, 0, 32);
}
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,88 +0,0 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#if HAVE_DSPR2
void vp8_dequant_idct_add_y_block_dspr2
(short *q, short *dq,
unsigned char *dst, int stride, char *eobs)
{
int i, j;
for (i = 0; i < 4; i++)
{
for (j = 0; j < 4; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_dspr2(q, dq, dst, stride);
else
{
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride);
((int *)q)[0] = 0;
}
q += 16;
dst += 4;
}
dst += 4 * stride - 16;
}
}
void vp8_dequant_idct_add_uv_block_dspr2
(short *q, short *dq,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
{
int i, j;
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_dspr2(q, dq, dstu, stride);
else
{
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride);
((int *)q)[0] = 0;
}
q += 16;
dstu += 4;
}
dstu += 4 * stride - 8;
}
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_dspr2(q, dq, dstv, stride);
else
{
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride);
((int *)q)[0] = 0;
}
q += 16;
dstv += 4;
}
dstv += 4 * stride - 8;
}
}
#endif

Просмотреть файл

@ -1,369 +0,0 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp8_rtcd.h"
#if HAVE_DSPR2
#define CROP_WIDTH 256
/******************************************************************************
* Notes:
*
* This implementation makes use of 16 bit fixed point version of two multiply
* constants:
* 1. sqrt(2) * cos (pi/8)
* 2. sqrt(2) * sin (pi/8)
* Since the first constant is bigger than 1, to maintain the same 16 bit
* fixed point precision as the second one, we use a trick of
* x * a = x + x*(a-1)
* so
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
****************************************************************************/
extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
inline void prefetch_load_short(short *src)
{
__asm__ __volatile__ (
"pref 0, 0(%[src]) \n\t"
:
: [src] "r" (src)
);
}
void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride)
{
int r, c;
int a1, b1, c1, d1;
short output[16];
short *ip = input;
short *op = output;
int temp1, temp2;
int shortpitch = 4;
int c2, d2;
int temp3, temp4;
unsigned char *cm = ff_cropTbl + CROP_WIDTH;
/* prepare data for load */
prefetch_load_short(ip + 8);
/* first loop is unrolled */
a1 = ip[0] + ip[8];
b1 = ip[0] - ip[8];
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[13] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
op[0] = a1 + d1;
op[12] = a1 - d1;
op[4] = b1 + c1;
op[8] = b1 - c1;
a1 = ip[1] + ip[9];
b1 = ip[1] - ip[9];
op[1] = a1 + d2;
op[13] = a1 - d2;
op[5] = b1 + c2;
op[9] = b1 - c2;
a1 = ip[2] + ip[10];
b1 = ip[2] - ip[10];
temp1 = (ip[6] * sinpi8sqrt2) >> 16;
temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[14] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp3 = (ip[7] * sinpi8sqrt2) >> 16;
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
op[2] = a1 + d1;
op[14] = a1 - d1;
op[6] = b1 + c1;
op[10] = b1 - c1;
a1 = ip[3] + ip[11];
b1 = ip[3] - ip[11];
op[3] = a1 + d2;
op[15] = a1 - d2;
op[7] = b1 + c2;
op[11] = b1 - c2;
ip = output;
/* prepare data for load */
prefetch_load_short(ip + shortpitch);
/* second loop is unrolled */
a1 = ip[0] + ip[2];
b1 = ip[0] - ip[2];
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[7] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
op[0] = (a1 + d1 + 4) >> 3;
op[3] = (a1 - d1 + 4) >> 3;
op[1] = (b1 + c1 + 4) >> 3;
op[2] = (b1 - c1 + 4) >> 3;
a1 = ip[4] + ip[6];
b1 = ip[4] - ip[6];
op[4] = (a1 + d2 + 4) >> 3;
op[7] = (a1 - d2 + 4) >> 3;
op[5] = (b1 + c2 + 4) >> 3;
op[6] = (b1 - c2 + 4) >> 3;
a1 = ip[8] + ip[10];
b1 = ip[8] - ip[10];
temp1 = (ip[9] * sinpi8sqrt2) >> 16;
temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[11] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp3 = (ip[13] * sinpi8sqrt2) >> 16;
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
op[8] = (a1 + d1 + 4) >> 3;
op[11] = (a1 - d1 + 4) >> 3;
op[9] = (b1 + c1 + 4) >> 3;
op[10] = (b1 - c1 + 4) >> 3;
a1 = ip[12] + ip[14];
b1 = ip[12] - ip[14];
op[12] = (a1 + d2 + 4) >> 3;
op[15] = (a1 - d2 + 4) >> 3;
op[13] = (b1 + c2 + 4) >> 3;
op[14] = (b1 - c2 + 4) >> 3;
ip = output;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
short a = ip[c] + pred_ptr[c] ;
dst_ptr[c] = cm[a] ;
}
ip += 4;
dst_ptr += dst_stride;
pred_ptr += pred_stride;
}
}
void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride)
{
int a1;
int i, absa1;
int t2, vector_a1, vector_a;
/* a1 = ((input_dc + 4) >> 3); */
__asm__ __volatile__ (
"addi %[a1], %[input_dc], 4 \n\t"
"sra %[a1], %[a1], 3 \n\t"
: [a1] "=r" (a1)
: [input_dc] "r" (input_dc)
);
if (a1 < 0)
{
/* use quad-byte
* input and output memory are four byte aligned
*/
__asm__ __volatile__ (
"abs %[absa1], %[a1] \n\t"
"replv.qb %[vector_a1], %[absa1] \n\t"
: [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
: [a1] "r" (a1)
);
/* use (a1 - predptr[c]) instead a1 + predptr[c] */
for (i = 4; i--;)
{
__asm__ __volatile__ (
"lw %[t2], 0(%[pred_ptr]) \n\t"
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
"subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t"
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
);
}
}
else
{
/* use quad-byte
* input and output memory are four byte aligned
*/
__asm__ __volatile__ (
"replv.qb %[vector_a1], %[a1] \n\t"
: [vector_a1] "=r" (vector_a1)
: [a1] "r" (a1)
);
for (i = 4; i--;)
{
__asm__ __volatile__ (
"lw %[t2], 0(%[pred_ptr]) \n\t"
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
"addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t"
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
);
}
}
}
void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff)
{
short output[16];
int i;
int a1, b1, c1, d1;
int a2, b2, c2, d2;
short *ip = input;
short *op = output;
prefetch_load_short(ip);
for (i = 4; i--;)
{
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
op[0] = a1 + b1;
op[4] = c1 + d1;
op[8] = a1 - b1;
op[12] = d1 - c1;
ip++;
op++;
}
ip = output;
op = output;
prefetch_load_short(ip);
for (i = 4; i--;)
{
a1 = ip[0] + ip[3] + 3;
b1 = ip[1] + ip[2];
c1 = ip[1] - ip[2];
d1 = ip[0] - ip[3] + 3;
a2 = a1 + b1;
b2 = d1 + c1;
c2 = a1 - b1;
d2 = d1 - c1;
op[0] = a2 >> 3;
op[1] = b2 >> 3;
op[2] = c2 >> 3;
op[3] = d2 >> 3;
ip += 4;
op += 4;
}
for (i = 0; i < 16; i++)
{
mb_dqcoeff[i * 16] = output[i];
}
}
void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff)
{
int a1;
a1 = ((input[0] + 3) >> 3);
__asm__ __volatile__ (
"sh %[a1], 0(%[mb_dqcoeff]) \n\t"
"sh %[a1], 32(%[mb_dqcoeff]) \n\t"
"sh %[a1], 64(%[mb_dqcoeff]) \n\t"
"sh %[a1], 96(%[mb_dqcoeff]) \n\t"
"sh %[a1], 128(%[mb_dqcoeff]) \n\t"
"sh %[a1], 160(%[mb_dqcoeff]) \n\t"
"sh %[a1], 192(%[mb_dqcoeff]) \n\t"
"sh %[a1], 224(%[mb_dqcoeff]) \n\t"
"sh %[a1], 256(%[mb_dqcoeff]) \n\t"
"sh %[a1], 288(%[mb_dqcoeff]) \n\t"
"sh %[a1], 320(%[mb_dqcoeff]) \n\t"
"sh %[a1], 352(%[mb_dqcoeff]) \n\t"
"sh %[a1], 384(%[mb_dqcoeff]) \n\t"
"sh %[a1], 416(%[mb_dqcoeff]) \n\t"
"sh %[a1], 448(%[mb_dqcoeff]) \n\t"
"sh %[a1], 480(%[mb_dqcoeff]) \n\t"
:
: [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff)
);
}
#endif

Просмотреть файл

@ -1,121 +0,0 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#if HAVE_DSPR2
inline void prefetch_load_int(unsigned char *src)
{
__asm__ __volatile__ (
"pref 0, 0(%[src]) \n\t"
:
: [src] "r" (src)
);
}
__inline void vp8_copy_mem16x16_dspr2(
unsigned char *RESTRICT src,
int src_stride,
unsigned char *RESTRICT dst,
int dst_stride)
{
int r;
unsigned int a0, a1, a2, a3;
for (r = 16; r--;)
{
/* load src data in cache memory */
prefetch_load_int(src + src_stride);
/* use unaligned memory load and store */
__asm__ __volatile__ (
"ulw %[a0], 0(%[src]) \n\t"
"ulw %[a1], 4(%[src]) \n\t"
"ulw %[a2], 8(%[src]) \n\t"
"ulw %[a3], 12(%[src]) \n\t"
"sw %[a0], 0(%[dst]) \n\t"
"sw %[a1], 4(%[dst]) \n\t"
"sw %[a2], 8(%[dst]) \n\t"
"sw %[a3], 12(%[dst]) \n\t"
: [a0] "=&r" (a0), [a1] "=&r" (a1),
[a2] "=&r" (a2), [a3] "=&r" (a3)
: [src] "r" (src), [dst] "r" (dst)
);
src += src_stride;
dst += dst_stride;
}
}
__inline void vp8_copy_mem8x8_dspr2(
unsigned char *RESTRICT src,
int src_stride,
unsigned char *RESTRICT dst,
int dst_stride)
{
int r;
unsigned int a0, a1;
/* load src data in cache memory */
prefetch_load_int(src + src_stride);
for (r = 8; r--;)
{
/* use unaligned memory load and store */
__asm__ __volatile__ (
"ulw %[a0], 0(%[src]) \n\t"
"ulw %[a1], 4(%[src]) \n\t"
"sw %[a0], 0(%[dst]) \n\t"
"sw %[a1], 4(%[dst]) \n\t"
: [a0] "=&r" (a0), [a1] "=&r" (a1)
: [src] "r" (src), [dst] "r" (dst)
);
src += src_stride;
dst += dst_stride;
}
}
__inline void vp8_copy_mem8x4_dspr2(
unsigned char *RESTRICT src,
int src_stride,
unsigned char *RESTRICT dst,
int dst_stride)
{
int r;
unsigned int a0, a1;
/* load src data in cache memory */
prefetch_load_int(src + src_stride);
for (r = 4; r--;)
{
/* use unaligned memory load and store */
__asm__ __volatile__ (
"ulw %[a0], 0(%[src]) \n\t"
"ulw %[a1], 4(%[src]) \n\t"
"sw %[a0], 0(%[dst]) \n\t"
"sw %[a1], 4(%[dst]) \n\t"
: [a0] "=&r" (a0), [a1] "=&r" (a1)
: [src] "r" (src), [dst] "r" (dst)
);
src += src_stride;
dst += dst_stride;
}
}
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,911 +0,0 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vpx_ports/mem.h"
#include "vp8/common/filter.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
DECLARE_ALIGNED(16, static const int8_t, vp8_bilinear_filters_msa[7][2]) =
{
{ 112, 16 },
{ 96, 32 },
{ 80, 48 },
{ 64, 64 },
{ 48, 80 },
{ 32, 96 },
{ 16, 112 }
};
static const uint8_t vp8_mc_filt_mask_arr[16 * 3] =
{
/* 8 width cases */
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
/* 4 width cases */
0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
/* 4 width cases */
8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
};
static void common_hz_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter)
{
v16i8 src0, src1, src2, src3, mask;
v16u8 filt0, vec0, vec1, res0, res1;
v8u16 vec2, vec3, filt;
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
filt = LD_UH(filter);
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
LD_SB4(src, src_stride, src0, src1, src2, src3);
VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
SRARI_H2_UH(vec2, vec3, VP8_FILTER_SHIFT);
PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
}
static void common_hz_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter)
{
v16u8 vec0, vec1, vec2, vec3, filt0;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
v16i8 res0, res1, res2, res3;
v8u16 vec4, vec5, vec6, vec7, filt;
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
filt = LD_UH(filter);
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
vec4, vec5, vec6, vec7);
SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT);
PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
res0, res1, res2, res3);
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
dst += (4 * dst_stride);
ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
}
static void common_hz_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter, int32_t height)
{
if (4 == height)
{
common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
}
else if (8 == height)
{
common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
}
}
static void common_hz_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter)
{
v16u8 filt0;
v16i8 src0, src1, src2, src3, mask;
v8u16 vec0, vec1, vec2, vec3, filt;
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
filt = LD_UH(filter);
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
LD_SB4(src, src_stride, src0, src1, src2, src3);
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
vec0, vec1, vec2, vec3);
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
ST8x4_UB(src0, src1, dst, dst_stride);
}
static void common_hz_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter, int32_t height)
{
v16u8 filt0;
v16i8 src0, src1, src2, src3, mask, out0, out1;
v8u16 vec0, vec1, vec2, vec3, filt;
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
filt = LD_UH(filter);
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
LD_SB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
vec0, vec1, vec2, vec3);
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
LD_SB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
vec0, vec1, vec2, vec3);
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
if (16 == height)
{
LD_SB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
vec0, vec1, vec2, vec3);
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
LD_SB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
ST8x4_UB(out0, out1, dst, dst_stride);
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
vec0, vec1, vec2, vec3);
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
}
}
static void common_hz_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter, int32_t height)
{
if (4 == height)
{
common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
}
else
{
common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
}
}
static void common_hz_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
loop_cnt = (height >> 2) - 1;
filt = LD_UH(filter);
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
LD_SB4(src, src_stride, src0, src2, src4, src6);
LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
src += (4 * src_stride);
VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
out0, out1, out2, out3);
DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
out4, out5, out6, out7);
SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT);
SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT);
PCKEV_ST_SB(out0, out1, dst);
dst += dst_stride;
PCKEV_ST_SB(out2, out3, dst);
dst += dst_stride;
PCKEV_ST_SB(out4, out5, dst);
dst += dst_stride;
PCKEV_ST_SB(out6, out7, dst);
dst += dst_stride;
for (; loop_cnt--;)
{
LD_SB4(src, src_stride, src0, src2, src4, src6);
LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
src += (4 * src_stride);
VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
out0, out1, out2, out3);
DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
out4, out5, out6, out7);
SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT);
SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT);
PCKEV_ST_SB(out0, out1, dst);
dst += dst_stride;
PCKEV_ST_SB(out2, out3, dst);
dst += dst_stride;
PCKEV_ST_SB(out4, out5, dst);
dst += dst_stride;
PCKEV_ST_SB(out6, out7, dst);
dst += dst_stride;
}
}
static void common_vt_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter)
{
v16i8 src0, src1, src2, src3, src4;
v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
v16u8 filt0;
v8i16 filt;
v8u16 tmp0, tmp1;
filt = LD_SH(filter);
filt0 = (v16u8)__msa_splati_h(filt, 0);
LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
src += (5 * src_stride);
ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
src10_r, src21_r, src32_r, src43_r);
ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
}
static void common_vt_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter)
{
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
v8u16 tmp0, tmp1, tmp2, tmp3;
v16u8 filt0;
v8i16 filt;
filt = LD_SH(filter);
filt0 = (v16u8)__msa_splati_h(filt, 0);
LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
src8 = LD_SB(src);
src += src_stride;
ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
src32_r, src43_r);
ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
src76_r, src87_r);
ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
src87_r, src76_r, src2110, src4332, src6554, src8776);
DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
tmp0, tmp1, tmp2, tmp3);
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride);
}
static void common_vt_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter, int32_t height)
{
if (4 == height)
{
common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
}
else if (8 == height)
{
common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
}
}
static void common_vt_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter)
{
v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
v16i8 out0, out1;
v8u16 tmp0, tmp1, tmp2, tmp3;
v8i16 filt;
filt = LD_SH(filter);
filt0 = (v16u8)__msa_splati_h(filt, 0);
LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
tmp0, tmp1, tmp2, tmp3);
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
ST8x4_UB(out0, out1, dst, dst_stride);
}
static void common_vt_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
v16i8 out0, out1;
v8u16 tmp0, tmp1, tmp2, tmp3;
v8i16 filt;
filt = LD_SH(filter);
filt0 = (v16u8)__msa_splati_h(filt, 0);
src0 = LD_UB(src);
src += src_stride;
for (loop_cnt = (height >> 3); loop_cnt--;)
{
LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
src += (8 * src_stride);
ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
vec0, vec1, vec2, vec3);
ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
vec4, vec5, vec6, vec7);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
tmp0, tmp1, tmp2, tmp3);
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
tmp0, tmp1, tmp2, tmp3);
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
src0 = src8;
}
}
static void common_vt_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter, int32_t height)
{
if (4 == height)
{
common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
}
else
{
common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
height);
}
}
static void common_vt_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16u8 src0, src1, src2, src3, src4;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
v8u16 tmp0, tmp1, tmp2, tmp3;
v8i16 filt;
filt = LD_SH(filter);
filt0 = (v16u8)__msa_splati_h(filt, 0);
src0 = LD_UB(src);
src += src_stride;
for (loop_cnt = (height >> 2); loop_cnt--;)
{
LD_UB4(src, src_stride, src1, src2, src3, src4);
src += (4 * src_stride);
ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
PCKEV_ST_SB(tmp0, tmp1, dst);
dst += dst_stride;
ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT);
PCKEV_ST_SB(tmp2, tmp3, dst);
dst += dst_stride;
DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
PCKEV_ST_SB(tmp0, tmp1, dst);
dst += dst_stride;
DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT);
PCKEV_ST_SB(tmp2, tmp3, dst);
dst += dst_stride;
src0 = src4;
}
}
static void common_hv_2ht_2vt_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter_horiz,
const int8_t *filter_vert)
{
v16i8 src0, src1, src2, src3, src4, mask;
v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1;
v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1;
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
filt = LD_UH(filter_horiz);
filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
filt = LD_UH(filter_vert);
filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT);
hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT);
hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT);
hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
}
static void common_hv_2ht_2vt_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter_horiz,
const int8_t *filter_vert)
{
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
v16i8 res0, res1, res2, res3;
v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt;
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
filt = LD_UH(filter_horiz);
filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
filt = LD_UH(filter_vert);
filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
src8 = LD_SB(src);
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT);
hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT);
hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, VP8_FILTER_SHIFT);
hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, VP8_FILTER_SHIFT);
hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, VP8_FILTER_SHIFT);
SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
hz_out3, hz_out5, 8);
hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6);
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
vec4, vec5, vec6, vec7);
SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT);
PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
res0, res1, res2, res3);
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
dst += (4 * dst_stride);
ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
}
static void common_hv_2ht_2vt_4w_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter_horiz,
const int8_t *filter_vert,
int32_t height)
{
if (4 == height)
{
common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride,
filter_horiz, filter_vert);
}
else if (8 == height)
{
common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride,
filter_horiz, filter_vert);
}
}
static void common_hv_2ht_2vt_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter_horiz,
const int8_t *filter_vert)
{
v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
v8i16 filt;
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
filt = LD_SH(filter_horiz);
filt_hz = (v16u8)__msa_splati_h(filt, 0);
filt = LD_SH(filter_vert);
filt_vt = (v16u8)__msa_splati_h(filt, 0);
LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT);
hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT);
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
tmp0 = __msa_dotp_u_h(vec0, filt_vt);
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, VP8_FILTER_SHIFT);
vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
tmp1 = __msa_dotp_u_h(vec1, filt_vt);
hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, VP8_FILTER_SHIFT);
vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
tmp2 = __msa_dotp_u_h(vec2, filt_vt);
hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT);
vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
tmp3 = __msa_dotp_u_h(vec3, filt_vt);
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
ST8x4_UB(out0, out1, dst, dst_stride);
}
static void common_hv_2ht_2vt_8x8mult_msa(uint8_t *RESTRICT src,
int32_t src_stride,
uint8_t *RESTRICT dst,
int32_t dst_stride,
const int8_t *filter_horiz,
const int8_t *filter_vert,
int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
v16u8 filt_hz, filt_vt, vec0;
v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
v8i16 filt;
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
filt = LD_SH(filter_horiz);
filt_hz = (v16u8)__msa_splati_h(filt, 0);
filt = LD_SH(filter_vert);
filt_vt = (v16u8)__msa_splati_h(filt, 0);
src0 = LD_SB(src);
src += src_stride;
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT);
for (loop_cnt = (height >> 3); loop_cnt--;)
{
LD_SB4(src, src_stride, src1, src2, src3, src4);
src += (4 * src_stride);
hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz,
VP8_FILTER_SHIFT);
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
tmp1 = __msa_dotp_u_h(vec0, filt_vt);
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz,
VP8_FILTER_SHIFT);
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
tmp2 = __msa_dotp_u_h(vec0, filt_vt);
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz,
VP8_FILTER_SHIFT);
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
tmp3 = __msa_dotp_u_h(vec0, filt_vt);
hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz,
VP8_FILTER_SHIFT);
LD_SB4(src, src_stride, src1, src2, src3, src4);
src += (4 * src_stride);
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
tmp4 = __msa_dotp_u_h(vec0, filt_vt);
SRARI_H2_UH(tmp3, tmp4, VP8_FILTER_SHIFT);
PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1);
ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz,
VP8_FILTER_SHIFT);
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
tmp5 = __msa_dotp_u_h(vec0, filt_vt);
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz,
VP8_FILTER_SHIFT);
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
tmp6 = __msa_dotp_u_h(vec0, filt_vt);
hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz,
VP8_FILTER_SHIFT);
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
tmp7 = __msa_dotp_u_h(vec0, filt_vt);
hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz,
VP8_FILTER_SHIFT);
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
tmp8 = __msa_dotp_u_h(vec0, filt_vt);
SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, VP8_FILTER_SHIFT);
PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1);
ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
}
}
static void common_hv_2ht_2vt_8w_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter_horiz,
const int8_t *filter_vert,
int32_t height)
{
if (4 == height)
{
common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride,
filter_horiz, filter_vert);
}
else
{
common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride,
filter_horiz, filter_vert, height);
}
}
static void common_hv_2ht_2vt_16w_msa(uint8_t *RESTRICT src, int32_t src_stride,
uint8_t *RESTRICT dst, int32_t dst_stride,
const int8_t *filter_horiz,
const int8_t *filter_vert,
int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
v16u8 filt_hz, filt_vt, vec0, vec1;
v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3;
v8i16 filt;
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
/* rearranging filter */
filt = LD_SH(filter_horiz);
filt_hz = (v16u8)__msa_splati_h(filt, 0);
filt = LD_SH(filter_vert);
filt_vt = (v16u8)__msa_splati_h(filt, 0);
LD_SB2(src, 8, src0, src1);
src += src_stride;
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT);
hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT);
for (loop_cnt = (height >> 2); loop_cnt--;)
{
LD_SB4(src, src_stride, src0, src2, src4, src6);
LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
src += (4 * src_stride);
hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz,
VP8_FILTER_SHIFT);
hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz,
VP8_FILTER_SHIFT);
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
PCKEV_ST_SB(tmp1, tmp2, dst);
dst += dst_stride;
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz,
VP8_FILTER_SHIFT);
hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz,
VP8_FILTER_SHIFT);
ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
PCKEV_ST_SB(tmp1, tmp2, dst);
dst += dst_stride;
hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz,
VP8_FILTER_SHIFT);
hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz,
VP8_FILTER_SHIFT);
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
PCKEV_ST_SB(tmp1, tmp2, dst);
dst += dst_stride;
hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz,
VP8_FILTER_SHIFT);
hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz,
VP8_FILTER_SHIFT);
ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
PCKEV_ST_SB(tmp1, tmp2, dst);
dst += dst_stride;
}
}
void vp8_bilinear_predict4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
int32_t xoffset, int32_t yoffset,
uint8_t *RESTRICT dst, int32_t dst_stride)
{
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
if (yoffset)
{
if (xoffset)
{
common_hv_2ht_2vt_4w_msa(src, src_stride, dst, dst_stride,
h_filter, v_filter, 4);
}
else
{
common_vt_2t_4w_msa(src, src_stride, dst, dst_stride, v_filter, 4);
}
}
else
{
if (xoffset)
{
common_hz_2t_4w_msa(src, src_stride, dst, dst_stride, h_filter, 4);
}
else
{
uint32_t tp0, tp1, tp2, tp3;
LW4(src, src_stride, tp0, tp1, tp2, tp3);
SW4(tp0, tp1, tp2, tp3, dst, dst_stride);
}
}
}
void vp8_bilinear_predict8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
int32_t xoffset, int32_t yoffset,
uint8_t *RESTRICT dst, int32_t dst_stride)
{
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
if (yoffset)
{
if (xoffset)
{
common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride,
h_filter, v_filter, 4);
}
else
{
common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 4);
}
}
else
{
if (xoffset)
{
common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 4);
}
else
{
vp8_copy_mem8x4(src, src_stride, dst, dst_stride);
}
}
}
void vp8_bilinear_predict8x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
int32_t xoffset, int32_t yoffset,
uint8_t *RESTRICT dst, int32_t dst_stride)
{
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
if (yoffset)
{
if (xoffset)
{
common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride,
h_filter, v_filter, 8);
}
else
{
common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 8);
}
}
else
{
if (xoffset)
{
common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 8);
}
else
{
vp8_copy_mem8x8(src, src_stride, dst, dst_stride);
}
}
}
void vp8_bilinear_predict16x16_msa(uint8_t *RESTRICT src, int32_t src_stride,
int32_t xoffset, int32_t yoffset,
uint8_t *RESTRICT dst, int32_t dst_stride)
{
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
if (yoffset)
{
if (xoffset)
{
common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride,
h_filter, v_filter, 16);
}
else
{
common_vt_2t_16w_msa(src, src_stride, dst, dst_stride, v_filter,
16);
}
}
else
{
if (xoffset)
{
common_hz_2t_16w_msa(src, src_stride, dst, dst_stride, h_filter,
16);
}
else
{
vp8_copy_mem16x16(src, src_stride, dst, dst_stride);
}
}
}

Просмотреть файл

@ -1,70 +0,0 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static void copy_8x4_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
uint64_t src0, src1, src2, src3;
LD4(src, src_stride, src0, src1, src2, src3);
SD4(src0, src1, src2, src3, dst, dst_stride);
}
static void copy_8x8_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
uint64_t src0, src1, src2, src3;
LD4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
SD4(src0, src1, src2, src3, dst, dst_stride);
dst += (4 * dst_stride);
LD4(src, src_stride, src0, src1, src2, src3);
SD4(src0, src1, src2, src3, dst, dst_stride);
}
static void copy_16x16_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
LD_UB8(src, src_stride, src8, src9, src10, src11, src12, src13, src14,
src15);
ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
dst += (8 * dst_stride);
ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, dst,
dst_stride);
}
void vp8_copy_mem16x16_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
copy_16x16_msa(src, src_stride, dst, dst_stride);
}
void vp8_copy_mem8x8_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
copy_8x8_msa(src, src_stride, dst, dst_stride);
}
void vp8_copy_mem8x4_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
copy_8x4_msa(src, src_stride, dst, dst_stride);
}

Просмотреть файл

@ -1,457 +0,0 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static const int32_t cospi8sqrt2minus1 = 20091;
static const int32_t sinpi8sqrt2 = 35468;
#define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v8i16 s4_m, s5_m, s6_m, s7_m; \
\
TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \
ILVR_D2_SH(s6_m, s4_m, s7_m, s5_m, out0, out2); \
out1 = (v8i16)__msa_ilvl_d((v2i64)s6_m, (v2i64)s4_m); \
out3 = (v8i16)__msa_ilvl_d((v2i64)s7_m, (v2i64)s5_m); \
}
#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \
({ \
v8i16 out_m; \
v8i16 zero_m = { 0 }; \
v4i32 tmp1_m, tmp2_m; \
v4i32 sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
\
ILVRL_H2_SW(in, zero_m, tmp1_m, tmp2_m); \
tmp1_m >>= 16; \
tmp2_m >>= 16; \
tmp1_m = (tmp1_m * sinpi8_sqrt2_m) >> 16; \
tmp2_m = (tmp2_m * sinpi8_sqrt2_m) >> 16; \
out_m = __msa_pckev_h((v8i16)tmp2_m, (v8i16)tmp1_m); \
\
out_m; \
})
#define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v8i16 a1_m, b1_m, c1_m, d1_m; \
v8i16 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
v8i16 const_cospi8sqrt2minus1_m; \
\
const_cospi8sqrt2minus1_m = __msa_fill_h(cospi8sqrt2minus1); \
a1_m = in0 + in2; \
b1_m = in0 - in2; \
c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \
c_tmp2_m = __msa_mul_q_h(in3, const_cospi8sqrt2minus1_m); \
c_tmp2_m = c_tmp2_m >> 1; \
c_tmp2_m = in3 + c_tmp2_m; \
c1_m = c_tmp1_m - c_tmp2_m; \
d_tmp1_m = __msa_mul_q_h(in1, const_cospi8sqrt2minus1_m); \
d_tmp1_m = d_tmp1_m >> 1; \
d_tmp1_m = in1 + d_tmp1_m; \
d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
}
#define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v4i32 a1_m, b1_m, c1_m, d1_m; \
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
v4i32 const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \
\
const_cospi8sqrt2minus1_m = __msa_fill_w(cospi8sqrt2minus1); \
sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
a1_m = in0 + in2; \
b1_m = in0 - in2; \
c_tmp1_m = (in1 * sinpi8_sqrt2_m) >> 16; \
c_tmp2_m = in3 + ((in3 * const_cospi8sqrt2minus1_m) >> 16); \
c1_m = c_tmp1_m - c_tmp2_m; \
d_tmp1_m = in1 + ((in1 * const_cospi8sqrt2minus1_m) >> 16); \
d_tmp2_m = (in3 * sinpi8_sqrt2_m) >> 16; \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
}
static void idct4x4_addblk_msa(int16_t *input, uint8_t *pred,
int32_t pred_stride,
uint8_t *dest, int32_t dest_stride)
{
v8i16 input0, input1;
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v4i32 res0, res1, res2, res3;
v16i8 zero = { 0 };
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31 };
LD_SH2(input, 8, input0, input1);
UNPCK_SH_SW(input0, in0, in1);
UNPCK_SH_SW(input1, in2, in3);
VP8_IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
ILVR_B4_SW(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
res2, res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
res0 = CLIP_SW_0_255(res0);
res1 = CLIP_SW_0_255(res1);
res2 = CLIP_SW_0_255(res2);
res3 = CLIP_SW_0_255(res3);
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
}
static void idct4x4_addconst_msa(int16_t in_dc, uint8_t *pred,
int32_t pred_stride,
uint8_t *dest, int32_t dest_stride)
{
v8i16 vec;
v8i16 res0, res1, res2, res3;
v16i8 zero = { 0 };
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
v16i8 mask = { 0, 2, 4, 6, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
vec = __msa_fill_h(in_dc);
vec = __msa_srari_h(vec, 3);
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
ILVR_B4_SH(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
res2, res3);
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
}
void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff)
{
v8i16 input0, input1;
v4i32 in0, in1, in2, in3, a1, b1, c1, d1;
v4i32 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
LD_SH2(input, 8, input0, input1);
UNPCK_SH_SW(input0, in0, in1);
UNPCK_SH_SW(input1, in2, in3);
BUTTERFLY_4(in0, in1, in2, in3, a1, b1, c1, d1);
BUTTERFLY_4(a1, d1, c1, b1, hz0, hz1, hz3, hz2);
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
BUTTERFLY_4(hz0, hz1, hz2, hz3, a1, b1, c1, d1);
BUTTERFLY_4(a1, d1, c1, b1, vt0, vt1, vt3, vt2);
ADD4(vt0, 3, vt1, 3, vt2, 3, vt3, 3, vt0, vt1, vt2, vt3);
SRA_4V(vt0, vt1, vt2, vt3, 3);
mb_dq_coeff[0] = __msa_copy_s_h((v8i16)vt0, 0);
mb_dq_coeff[16] = __msa_copy_s_h((v8i16)vt1, 0);
mb_dq_coeff[32] = __msa_copy_s_h((v8i16)vt2, 0);
mb_dq_coeff[48] = __msa_copy_s_h((v8i16)vt3, 0);
mb_dq_coeff[64] = __msa_copy_s_h((v8i16)vt0, 2);
mb_dq_coeff[80] = __msa_copy_s_h((v8i16)vt1, 2);
mb_dq_coeff[96] = __msa_copy_s_h((v8i16)vt2, 2);
mb_dq_coeff[112] = __msa_copy_s_h((v8i16)vt3, 2);
mb_dq_coeff[128] = __msa_copy_s_h((v8i16)vt0, 4);
mb_dq_coeff[144] = __msa_copy_s_h((v8i16)vt1, 4);
mb_dq_coeff[160] = __msa_copy_s_h((v8i16)vt2, 4);
mb_dq_coeff[176] = __msa_copy_s_h((v8i16)vt3, 4);
mb_dq_coeff[192] = __msa_copy_s_h((v8i16)vt0, 6);
mb_dq_coeff[208] = __msa_copy_s_h((v8i16)vt1, 6);
mb_dq_coeff[224] = __msa_copy_s_h((v8i16)vt2, 6);
mb_dq_coeff[240] = __msa_copy_s_h((v8i16)vt3, 6);
}
static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input,
uint8_t *dest, int32_t dest_stride)
{
v8i16 input0, input1, dequant_in0, dequant_in1, mul0, mul1;
v8i16 in0, in1, in2, in3;
v8i16 hz0_h, hz1_h, hz2_h, hz3_h;
v16i8 dest0, dest1, dest2, dest3;
v4i32 hz0_w, hz1_w, hz2_w, hz3_w;
v4i32 vt0, vt1, vt2, vt3, res0, res1, res2, res3;
v2i64 zero = { 0 };
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31 };
LD_SH2(input, 8, input0, input1);
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
MUL2(input0, dequant_in0, input1, dequant_in1, mul0, mul1);
PCKEV_D2_SH(zero, mul0, zero, mul1, in0, in2);
PCKOD_D2_SH(zero, mul0, zero, mul1, in1, in3);
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h);
PCKEV_D2_SH(hz1_h, hz0_h, hz3_h, hz2_h, mul0, mul1);
UNPCK_SH_SW(mul0, hz0_w, hz1_w);
UNPCK_SH_SW(mul1, hz2_w, hz3_w);
TRANSPOSE4x4_SW_SW(hz0_w, hz1_w, hz2_w, hz3_w, hz0_w, hz1_w, hz2_w, hz3_w);
VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3);
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
res2, res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
res0 = CLIP_SW_0_255(res0);
res1 = CLIP_SW_0_255(res1);
res2 = CLIP_SW_0_255(res2);
res3 = CLIP_SW_0_255(res3);
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
}
static void dequant_idct4x4_addblk_2x_msa(int16_t *input,
int16_t *dequant_input,
uint8_t *dest, int32_t dest_stride)
{
v16u8 dest0, dest1, dest2, dest3;
v8i16 in0, in1, in2, in3;
v8i16 mul0, mul1, mul2, mul3, dequant_in0, dequant_in1;
v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v8i16 res0, res1, res2, res3;
v4i32 hz0l, hz1l, hz2l, hz3l, hz0r, hz1r, hz2r, hz3r;
v4i32 vt0l, vt1l, vt2l, vt3l, vt0r, vt1r, vt2r, vt3r;
v16i8 zero = { 0 };
LD_SH4(input, 8, in0, in1, in2, in3);
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
MUL4(in0, dequant_in0, in1, dequant_in1, in2, dequant_in0, in3, dequant_in1,
mul0, mul1, mul2, mul3);
PCKEV_D2_SH(mul2, mul0, mul3, mul1, in0, in2);
PCKOD_D2_SH(mul2, mul0, mul3, mul1, in1, in3);
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
TRANSPOSE_TWO_4x4_H(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
UNPCK_SH_SW(hz0, hz0r, hz0l);
UNPCK_SH_SW(hz1, hz1r, hz1l);
UNPCK_SH_SW(hz2, hz2r, hz2l);
UNPCK_SH_SW(hz3, hz3r, hz3l);
VP8_IDCT_1D_W(hz0l, hz1l, hz2l, hz3l, vt0l, vt1l, vt2l, vt3l);
SRARI_W4_SW(vt0l, vt1l, vt2l, vt3l, 3);
VP8_IDCT_1D_W(hz0r, hz1r, hz2r, hz3r, vt0r, vt1r, vt2r, vt3r);
SRARI_W4_SW(vt0r, vt1r, vt2r, vt3r, 3);
PCKEV_H4_SH(vt0l, vt0r, vt1l, vt1r, vt2l, vt2r, vt3l, vt3r, vt0, vt1, vt2,
vt3);
TRANSPOSE_TWO_4x4_H(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
res2, res3);
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
__asm__ __volatile__(
"sw $zero, 0(%[input]) \n\t"
"sw $zero, 4(%[input]) \n\t"
"sw $zero, 8(%[input]) \n\t"
"sw $zero, 12(%[input]) \n\t"
"sw $zero, 16(%[input]) \n\t"
"sw $zero, 20(%[input]) \n\t"
"sw $zero, 24(%[input]) \n\t"
"sw $zero, 28(%[input]) \n\t"
"sw $zero, 32(%[input]) \n\t"
"sw $zero, 36(%[input]) \n\t"
"sw $zero, 40(%[input]) \n\t"
"sw $zero, 44(%[input]) \n\t"
"sw $zero, 48(%[input]) \n\t"
"sw $zero, 52(%[input]) \n\t"
"sw $zero, 56(%[input]) \n\t"
"sw $zero, 60(%[input]) \n\t"::
[input] "r"(input)
);
}
static void dequant_idct_addconst_2x_msa(int16_t *input, int16_t *dequant_input,
uint8_t *dest, int32_t dest_stride)
{
v8i16 input_dc0, input_dc1, vec;
v16u8 dest0, dest1, dest2, dest3;
v16i8 zero = { 0 };
v8i16 res0, res1, res2, res3;
input_dc0 = __msa_fill_h(input[0] * dequant_input[0]);
input_dc1 = __msa_fill_h(input[16] * dequant_input[0]);
SRARI_H2_SH(input_dc0, input_dc1, 3);
vec = (v8i16)__msa_pckev_d((v2i64)input_dc1, (v2i64)input_dc0);
input[0] = 0;
input[16] = 0;
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0,
res1, res2, res3);
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
res2, res3);
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
}
void vp8_short_idct4x4llm_msa(int16_t *input, uint8_t *pred_ptr,
int32_t pred_stride, uint8_t *dst_ptr,
int32_t dst_stride)
{
idct4x4_addblk_msa(input, pred_ptr, pred_stride, dst_ptr, dst_stride);
}
void vp8_dc_only_idct_add_msa(int16_t input_dc, uint8_t *pred_ptr,
int32_t pred_stride, uint8_t *dst_ptr,
int32_t dst_stride)
{
idct4x4_addconst_msa(input_dc, pred_ptr, pred_stride, dst_ptr, dst_stride);
}
void vp8_dequantize_b_msa(BLOCKD *d, int16_t *DQC)
{
v8i16 dqc0, dqc1, q0, q1, dq0, dq1;
LD_SH2(DQC, 8, dqc0, dqc1);
LD_SH2(d->qcoeff, 8, q0, q1);
MUL2(dqc0, q0, dqc1, q1, dq0, dq1);
ST_SH2(dq0, dq1, d->dqcoeff, 8);
}
void vp8_dequant_idct_add_msa(int16_t *input, int16_t *dq,
uint8_t *dest, int32_t stride)
{
dequant_idct4x4_addblk_msa(input, dq, dest, stride);
__asm__ __volatile__ (
"sw $zero, 0(%[input]) \n\t"
"sw $zero, 4(%[input]) \n\t"
"sw $zero, 8(%[input]) \n\t"
"sw $zero, 12(%[input]) \n\t"
"sw $zero, 16(%[input]) \n\t"
"sw $zero, 20(%[input]) \n\t"
"sw $zero, 24(%[input]) \n\t"
"sw $zero, 28(%[input]) \n\t"
:
: [input] "r" (input)
);
}
void vp8_dequant_idct_add_y_block_msa(int16_t *q, int16_t *dq,
uint8_t *dst, int32_t stride,
char *eobs)
{
int16_t *eobs_h = (int16_t *)eobs;
uint8_t i;
for (i = 4; i--;)
{
if (eobs_h[0])
{
if (eobs_h[0] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dst, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dst, stride);
}
}
q += 32;
if (eobs_h[1])
{
if (eobs_h[1] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dst + 8, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dst + 8, stride);
}
}
q += 32;
dst += (4 * stride);
eobs_h += 2;
}
}
void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq,
uint8_t *dstu, uint8_t *dstv,
int32_t stride, char *eobs)
{
int16_t *eobs_h = (int16_t *)eobs;
if (eobs_h[0])
{
if (eobs_h[0] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
}
}
q += 32;
dstu += (stride * 4);
if (eobs_h[1])
{
if (eobs_h[1] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
}
}
q += 32;
if (eobs_h[2])
{
if (eobs_h[2] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
}
}
q += 32;
dstv += (stride * 4);
if (eobs_h[3])
{
if (eobs_h[3] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
}
}
}

Просмотреть файл

@ -1,826 +0,0 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
#define VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask) \
{ \
v16u8 p1_a_sub_q1, p0_a_sub_q0; \
\
p0_a_sub_q0 = __msa_asub_u_b(p0, q0); \
p1_a_sub_q1 = __msa_asub_u_b(p1, q1); \
p1_a_sub_q1 = (v16u8)__msa_srli_b((v16i8)p1_a_sub_q1, 1); \
p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0); \
mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1); \
mask = ((v16u8)mask <= b_limit); \
}
#define VP8_LPF_FILTER4_4W(p1_in_out, p0_in_out, q0_in_out, q1_in_out, \
mask_in, hev_in) \
{ \
v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
\
p1_m = (v16i8)__msa_xori_b(p1_in_out, 0x80); \
p0_m = (v16i8)__msa_xori_b(p0_in_out, 0x80); \
q0_m = (v16i8)__msa_xori_b(q0_in_out, 0x80); \
q1_m = (v16i8)__msa_xori_b(q1_in_out, 0x80); \
\
filt = __msa_subs_s_b(p1_m, q1_m); \
\
filt = filt & (v16i8)hev_in; \
\
q0_sub_p0 = q0_m - p0_m; \
filt_sign = __msa_clti_s_b(filt, 0); \
\
cnst3h = __msa_ldi_h(3); \
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
filt_r += q0_sub_p0_r; \
filt_r = __msa_sat_s_h(filt_r, 7); \
\
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0); \
q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h); \
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
filt_l += q0_sub_p0_l; \
filt_l = __msa_sat_s_h(filt_l, 7); \
\
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
filt = filt & (v16i8)mask_in; \
\
cnst4b = __msa_ldi_b(4); \
filt1 = __msa_adds_s_b(filt, cnst4b); \
filt1 >>= 3; \
\
cnst3b = __msa_ldi_b(3); \
filt2 = __msa_adds_s_b(filt, cnst3b); \
filt2 >>= 3; \
\
q0_m = __msa_subs_s_b(q0_m, filt1); \
q0_in_out = __msa_xori_b((v16u8)q0_m, 0x80); \
p0_m = __msa_adds_s_b(p0_m, filt2); \
p0_in_out = __msa_xori_b((v16u8)p0_m, 0x80); \
\
filt = __msa_srari_b(filt1, 1); \
hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
filt = filt & (v16i8)hev_in; \
\
q1_m = __msa_subs_s_b(q1_m, filt); \
q1_in_out = __msa_xori_b((v16u8)q1_m, 0x80); \
p1_m = __msa_adds_s_b(p1_m, filt); \
p1_in_out = __msa_xori_b((v16u8)p1_m, 0x80); \
}
#define VP8_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) \
{ \
v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, q0_sub_p0_sign; \
v16i8 filt, filt1, filt2, cnst4b, cnst3b, filt_sign; \
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
\
p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
\
filt = __msa_subs_s_b(p1_m, q1_m); \
\
q0_sub_p0 = q0_m - p0_m; \
filt_sign = __msa_clti_s_b(filt, 0); \
\
cnst3h = __msa_ldi_h(3); \
q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0); \
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0); \
q0_sub_p0_r *= cnst3h; \
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
filt_r += q0_sub_p0_r; \
filt_r = __msa_sat_s_h(filt_r, 7); \
\
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0); \
q0_sub_p0_l *= cnst3h; \
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
filt_l += q0_sub_p0_l; \
filt_l = __msa_sat_s_h(filt_l, 7); \
\
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
filt = filt & (v16i8)(mask); \
\
cnst4b = __msa_ldi_b(4); \
filt1 = __msa_adds_s_b(filt, cnst4b); \
filt1 >>= 3; \
\
cnst3b = __msa_ldi_b(3); \
filt2 = __msa_adds_s_b(filt, cnst3b); \
filt2 >>= 3; \
\
q0_m = __msa_subs_s_b(q0_m, filt1); \
p0_m = __msa_adds_s_b(p0_m, filt2); \
q0_in = __msa_xori_b((v16u8)q0_m, 0x80); \
p0_in = __msa_xori_b((v16u8)p0_m, 0x80); \
}
#define VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \
{ \
v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \
v16i8 filt, q0_sub_p0, cnst4b, cnst3b; \
v16i8 u, filt1, filt2, filt_sign, q0_sub_p0_sign; \
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_r, u_r, u_l, filt_l; \
v8i16 cnst3h, cnst27h, cnst18h, cnst63h; \
\
cnst3h = __msa_ldi_h(3); \
\
p2_m = (v16i8)__msa_xori_b(p2, 0x80); \
p1_m = (v16i8)__msa_xori_b(p1, 0x80); \
p0_m = (v16i8)__msa_xori_b(p0, 0x80); \
q0_m = (v16i8)__msa_xori_b(q0, 0x80); \
q1_m = (v16i8)__msa_xori_b(q1, 0x80); \
q2_m = (v16i8)__msa_xori_b(q2, 0x80); \
\
filt = __msa_subs_s_b(p1_m, q1_m); \
q0_sub_p0 = q0_m - p0_m; \
q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0); \
filt_sign = __msa_clti_s_b(filt, 0); \
\
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0); \
q0_sub_p0_r *= cnst3h; \
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
filt_r = filt_r + q0_sub_p0_r; \
filt_r = __msa_sat_s_h(filt_r, 7); \
\
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0); \
q0_sub_p0_l *= cnst3h; \
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
filt_l = filt_l + q0_sub_p0_l; \
filt_l = __msa_sat_s_h(filt_l, 7); \
\
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
filt = filt & (v16i8)mask; \
filt2 = filt & (v16i8)hev; \
\
hev = __msa_xori_b(hev, 0xff); \
filt = filt & (v16i8)hev; \
cnst4b = __msa_ldi_b(4); \
filt1 = __msa_adds_s_b(filt2, cnst4b); \
filt1 >>= 3; \
cnst3b = __msa_ldi_b(3); \
filt2 = __msa_adds_s_b(filt2, cnst3b); \
filt2 >>= 3; \
q0_m = __msa_subs_s_b(q0_m, filt1); \
p0_m = __msa_adds_s_b(p0_m, filt2); \
\
filt_sign = __msa_clti_s_b(filt, 0); \
ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \
\
cnst27h = __msa_ldi_h(27); \
cnst63h = __msa_ldi_h(63); \
\
u_r = filt_r * cnst27h; \
u_r += cnst63h; \
u_r >>= 7; \
u_r = __msa_sat_s_h(u_r, 7); \
u_l = filt_l * cnst27h; \
u_l += cnst63h; \
u_l >>= 7; \
u_l = __msa_sat_s_h(u_l, 7); \
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
q0_m = __msa_subs_s_b(q0_m, u); \
q0 = __msa_xori_b((v16u8)q0_m, 0x80); \
p0_m = __msa_adds_s_b(p0_m, u); \
p0 = __msa_xori_b((v16u8)p0_m, 0x80); \
cnst18h = __msa_ldi_h(18); \
u_r = filt_r * cnst18h; \
u_r += cnst63h; \
u_r >>= 7; \
u_r = __msa_sat_s_h(u_r, 7); \
\
u_l = filt_l * cnst18h; \
u_l += cnst63h; \
u_l >>= 7; \
u_l = __msa_sat_s_h(u_l, 7); \
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
q1_m = __msa_subs_s_b(q1_m, u); \
q1 = __msa_xori_b((v16u8)q1_m, 0x80); \
p1_m = __msa_adds_s_b(p1_m, u); \
p1 = __msa_xori_b((v16u8)p1_m, 0x80); \
u_r = filt_r << 3; \
u_r += filt_r + cnst63h; \
u_r >>= 7; \
u_r = __msa_sat_s_h(u_r, 7); \
\
u_l = filt_l << 3; \
u_l += filt_l + cnst63h; \
u_l >>= 7; \
u_l = __msa_sat_s_h(u_l, 7); \
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
q2_m = __msa_subs_s_b(q2_m, u); \
q2 = __msa_xori_b((v16u8)q2_m, 0x80); \
p2_m = __msa_adds_s_b(p2_m, u); \
p2 = __msa_xori_b((v16u8)p2_m, 0x80); \
}
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, \
q0_in, q1_in, q2_in, q3_in, \
limit_in, b_limit_in, thresh_in, \
hev_out, mask_out, flat_out) \
{ \
v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \
v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \
\
p3_asub_p2_m = __msa_asub_u_b((p3_in), (p2_in)); \
p2_asub_p1_m = __msa_asub_u_b((p2_in), (p1_in)); \
p1_asub_p0_m = __msa_asub_u_b((p1_in), (p0_in)); \
q1_asub_q0_m = __msa_asub_u_b((q1_in), (q0_in)); \
q2_asub_q1_m = __msa_asub_u_b((q2_in), (q1_in)); \
q3_asub_q2_m = __msa_asub_u_b((q3_in), (q2_in)); \
p0_asub_q0_m = __msa_asub_u_b((p0_in), (q0_in)); \
p1_asub_q1_m = __msa_asub_u_b((p1_in), (q1_in)); \
flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \
hev_out = (thresh_in) < (v16u8)flat_out; \
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \
p1_asub_q1_m >>= 1; \
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \
mask_out = (b_limit_in) < p0_asub_q0_m; \
mask_out = __msa_max_u_b(flat_out, mask_out); \
p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \
mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \
q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \
mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \
mask_out = (limit_in) < (v16u8)mask_out; \
mask_out = __msa_xori_b(mask_out, 0xff); \
}
#define VP8_ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride) \
{ \
uint16_t tmp0_h; \
uint32_t tmp0_w; \
\
tmp0_w = __msa_copy_u_w((v4i32)in0, in0_idx); \
tmp0_h = __msa_copy_u_h((v8i16)in1, in1_idx); \
SW(tmp0_w, pdst); \
SH(tmp0_h, pdst + stride); \
}
static void loop_filter_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0_ptr,
const uint8_t *limit0_ptr,
const uint8_t *thresh0_ptr,
const uint8_t *b_limit1_ptr,
const uint8_t *limit1_ptr,
const uint8_t *thresh1_ptr)
{
v16u8 mask, hev, flat;
v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
hev, mask, flat);
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
}
static void loop_filter_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0_ptr,
const uint8_t *limit0_ptr,
const uint8_t *thresh0_ptr,
const uint8_t *b_limit1_ptr,
const uint8_t *limit1_ptr,
const uint8_t *thresh1_ptr)
{
v16u8 mask, hev, flat;
v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
LD_UB8(src - 4 + (8 * pitch), pitch,
row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p3, p2, p1, p0, q0, q1, q2, q3);
thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
hev, mask, flat);
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
src -= 2;
ST4x8_UB(tmp2, tmp3, src, pitch);
src += (8 * pitch);
ST4x8_UB(tmp4, tmp5, src, pitch);
}
static void mbloop_filter_horizontal_edge_y_msa(uint8_t *src, int32_t pitch,
const uint8_t b_limit_in,
const uint8_t limit_in,
const uint8_t thresh_in)
{
uint8_t *temp_src;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 mask, hev, flat, thresh, limit, b_limit;
b_limit = (v16u8)__msa_fill_b(b_limit_in);
limit = (v16u8)__msa_fill_b(limit_in);
thresh = (v16u8)__msa_fill_b(thresh_in);
temp_src = src - (pitch << 2);
LD_UB8(temp_src, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
temp_src = src - 3 * pitch;
ST_UB4(p2, p1, p0, q0, temp_src, pitch);
temp_src += (4 * pitch);
ST_UB2(q1, q2, temp_src, pitch);
}
static void mbloop_filter_horizontal_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
int32_t pitch,
const uint8_t b_limit_in,
const uint8_t limit_in,
const uint8_t thresh_in)
{
uint8_t *temp_src;
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 mask, hev, flat, thresh, limit, b_limit;
v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v;
b_limit = (v16u8)__msa_fill_b(b_limit_in);
limit = (v16u8)__msa_fill_b(limit_in);
thresh = (v16u8)__msa_fill_b(thresh_in);
temp_src = src_u - (pitch << 2);
LD_UB8(temp_src, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u);
temp_src = src_v - (pitch << 2);
LD_UB8(temp_src, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
p2_d = __msa_copy_u_d((v2i64)p2, 0);
p1_d = __msa_copy_u_d((v2i64)p1, 0);
p0_d = __msa_copy_u_d((v2i64)p0, 0);
q0_d = __msa_copy_u_d((v2i64)q0, 0);
q1_d = __msa_copy_u_d((v2i64)q1, 0);
q2_d = __msa_copy_u_d((v2i64)q2, 0);
src_u -= (pitch * 3);
SD4(p2_d, p1_d, p0_d, q0_d, src_u, pitch);
src_u += 4 * pitch;
SD(q1_d, src_u);
src_u += pitch;
SD(q2_d, src_u);
p2_d = __msa_copy_u_d((v2i64)p2, 1);
p1_d = __msa_copy_u_d((v2i64)p1, 1);
p0_d = __msa_copy_u_d((v2i64)p0, 1);
q0_d = __msa_copy_u_d((v2i64)q0, 1);
q1_d = __msa_copy_u_d((v2i64)q1, 1);
q2_d = __msa_copy_u_d((v2i64)q2, 1);
src_v -= (pitch * 3);
SD4(p2_d, p1_d, p0_d, q0_d, src_v, pitch);
src_v += 4 * pitch;
SD(q1_d, src_v);
src_v += pitch;
SD(q2_d, src_v);
}
static void mbloop_filter_vertical_edge_y_msa(uint8_t *src, int32_t pitch,
const uint8_t b_limit_in,
const uint8_t limit_in,
const uint8_t thresh_in)
{
uint8_t *temp_src;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 mask, hev, flat, thresh, limit, b_limit;
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
v16u8 row9, row10, row11, row12, row13, row14, row15;
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
b_limit = (v16u8)__msa_fill_b(b_limit_in);
limit = (v16u8)__msa_fill_b(limit_in);
thresh = (v16u8)__msa_fill_b(thresh_in);
temp_src = src - 4;
LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
temp_src += (8 * pitch);
LD_UB8(temp_src, pitch,
row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7);
ILVRL_B2_SH(q2, q1, tmp2, tmp5);
temp_src = src - 3;
VP8_ST6x1_UB(tmp3, 0, tmp2, 0, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp3, 1, tmp2, 1, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp3, 2, tmp2, 2, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp3, 3, tmp2, 3, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp4, 0, tmp2, 4, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp4, 1, tmp2, 5, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp4, 2, tmp2, 6, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp4, 3, tmp2, 7, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp6, 0, tmp5, 0, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp6, 1, tmp5, 1, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp6, 2, tmp5, 2, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp6, 3, tmp5, 3, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp7, 0, tmp5, 4, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp7, 1, tmp5, 5, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp7, 2, tmp5, 6, temp_src, 4);
temp_src += pitch;
VP8_ST6x1_UB(tmp7, 3, tmp5, 7, temp_src, 4);
}
static void mbloop_filter_vertical_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
int32_t pitch,
const uint8_t b_limit_in,
const uint8_t limit_in,
const uint8_t thresh_in)
{
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 mask, hev, flat, thresh, limit, b_limit;
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
v16u8 row9, row10, row11, row12, row13, row14, row15;
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
b_limit = (v16u8)__msa_fill_b(b_limit_in);
limit = (v16u8)__msa_fill_b(limit_in);
thresh = (v16u8)__msa_fill_b(thresh_in);
LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
LD_UB8(src_v - 4, pitch,
row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7);
ILVRL_B2_SH(q2, q1, tmp2, tmp5);
src_u -= 3;
VP8_ST6x1_UB(tmp3, 0, tmp2, 0, src_u, 4);
src_u += pitch;
VP8_ST6x1_UB(tmp3, 1, tmp2, 1, src_u, 4);
src_u += pitch;
VP8_ST6x1_UB(tmp3, 2, tmp2, 2, src_u, 4);
src_u += pitch;
VP8_ST6x1_UB(tmp3, 3, tmp2, 3, src_u, 4);
src_u += pitch;
VP8_ST6x1_UB(tmp4, 0, tmp2, 4, src_u, 4);
src_u += pitch;
VP8_ST6x1_UB(tmp4, 1, tmp2, 5, src_u, 4);
src_u += pitch;
VP8_ST6x1_UB(tmp4, 2, tmp2, 6, src_u, 4);
src_u += pitch;
VP8_ST6x1_UB(tmp4, 3, tmp2, 7, src_u, 4);
src_v -= 3;
VP8_ST6x1_UB(tmp6, 0, tmp5, 0, src_v, 4);
src_v += pitch;
VP8_ST6x1_UB(tmp6, 1, tmp5, 1, src_v, 4);
src_v += pitch;
VP8_ST6x1_UB(tmp6, 2, tmp5, 2, src_v, 4);
src_v += pitch;
VP8_ST6x1_UB(tmp6, 3, tmp5, 3, src_v, 4);
src_v += pitch;
VP8_ST6x1_UB(tmp7, 0, tmp5, 4, src_v, 4);
src_v += pitch;
VP8_ST6x1_UB(tmp7, 1, tmp5, 5, src_v, 4);
src_v += pitch;
VP8_ST6x1_UB(tmp7, 2, tmp5, 6, src_v, 4);
src_v += pitch;
VP8_ST6x1_UB(tmp7, 3, tmp5, 7, src_v, 4);
}
void vp8_loop_filter_simple_horizontal_edge_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr)
{
v16u8 p1, p0, q1, q0;
v16u8 mask, b_limit;
b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
LD_UB4(src - (pitch << 1), pitch, p1, p0, q0, q1);
VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
VP8_SIMPLE_FILT(p1, p0, q0, q1, mask);
ST_UB2(p0, q0, (src - pitch), pitch);
}
void vp8_loop_filter_simple_vertical_edge_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr)
{
uint8_t *temp_src;
v16u8 p1, p0, q1, q0;
v16u8 mask, b_limit;
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
v16u8 row9, row10, row11, row12, row13, row14, row15;
v8i16 tmp0, tmp1;
b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
temp_src = src - 2;
LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
temp_src += (8 * pitch);
LD_UB8(temp_src, pitch,
row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p1, p0, q0, q1);
VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
VP8_SIMPLE_FILT(p1, p0, q0, q1, mask);
ILVRL_B2_SH(q0, p0, tmp1, tmp0);
src -= 1;
ST2x4_UB(tmp1, 0, src, pitch);
src += 4 * pitch;
ST2x4_UB(tmp1, 4, src, pitch);
src += 4 * pitch;
ST2x4_UB(tmp0, 0, src, pitch);
src += 4 * pitch;
ST2x4_UB(tmp0, 4, src, pitch);
src += 4 * pitch;
}
static void loop_filter_horizontal_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
int32_t pitch,
const uint8_t b_limit_in,
const uint8_t limit_in,
const uint8_t thresh_in)
{
uint64_t p1_d, p0_d, q0_d, q1_d;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 mask, hev, flat, thresh, limit, b_limit;
v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v;
thresh = (v16u8)__msa_fill_b(thresh_in);
limit = (v16u8)__msa_fill_b(limit_in);
b_limit = (v16u8)__msa_fill_b(b_limit_in);
src_u = src_u - (pitch << 2);
LD_UB8(src_u, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u);
src_u += (5 * pitch);
src_v = src_v - (pitch << 2);
LD_UB8(src_v, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
src_v += (5 * pitch);
/* right 8 element of p3 are u pixel and
left 8 element of p3 are v pixel */
ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
p1_d = __msa_copy_u_d((v2i64)p1, 0);
p0_d = __msa_copy_u_d((v2i64)p0, 0);
q0_d = __msa_copy_u_d((v2i64)q0, 0);
q1_d = __msa_copy_u_d((v2i64)q1, 0);
SD4(q1_d, q0_d, p0_d, p1_d, src_u, (- pitch));
p1_d = __msa_copy_u_d((v2i64)p1, 1);
p0_d = __msa_copy_u_d((v2i64)p0, 1);
q0_d = __msa_copy_u_d((v2i64)q0, 1);
q1_d = __msa_copy_u_d((v2i64)q1, 1);
SD4(q1_d, q0_d, p0_d, p1_d, src_v, (- pitch));
}
static void loop_filter_vertical_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
int32_t pitch,
const uint8_t b_limit_in,
const uint8_t limit_in,
const uint8_t thresh_in)
{
uint8_t *temp_src_u, *temp_src_v;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 mask, hev, flat, thresh, limit, b_limit;
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
v16u8 row9, row10, row11, row12, row13, row14, row15;
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
thresh = (v16u8)__msa_fill_b(thresh_in);
limit = (v16u8)__msa_fill_b(limit_in);
b_limit = (v16u8)__msa_fill_b(b_limit_in);
LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
LD_UB8(src_v - 4, pitch,
row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
ILVR_B2_SW(p0, p1, q1, q0, tmp0, tmp1);
ILVRL_H2_SW(tmp1, tmp0, tmp2, tmp3);
tmp0 = (v4i32)__msa_ilvl_b((v16i8)p0, (v16i8)p1);
tmp1 = (v4i32)__msa_ilvl_b((v16i8)q1, (v16i8)q0);
ILVRL_H2_SW(tmp1, tmp0, tmp4, tmp5);
temp_src_u = src_u - 2;
ST4x4_UB(tmp2, tmp2, 0, 1, 2, 3, temp_src_u, pitch);
temp_src_u += 4 * pitch;
ST4x4_UB(tmp3, tmp3, 0, 1, 2, 3, temp_src_u, pitch);
temp_src_v = src_v - 2;
ST4x4_UB(tmp4, tmp4, 0, 1, 2, 3, temp_src_v, pitch);
temp_src_v += 4 * pitch;
ST4x4_UB(tmp5, tmp5, 0, 1, 2, 3, temp_src_v, pitch);
}
void vp8_loop_filter_mbh_msa(uint8_t *src_y, uint8_t *src_u,
uint8_t *src_v, int32_t pitch_y,
int32_t pitch_u_v,
loop_filter_info *lpf_info_ptr)
{
mbloop_filter_horizontal_edge_y_msa(src_y, pitch_y,
*lpf_info_ptr->mblim,
*lpf_info_ptr->lim,
*lpf_info_ptr->hev_thr);
if (src_u)
{
mbloop_filter_horizontal_edge_uv_msa(src_u, src_v, pitch_u_v,
*lpf_info_ptr->mblim,
*lpf_info_ptr->lim,
*lpf_info_ptr->hev_thr);
}
}
void vp8_loop_filter_mbv_msa(uint8_t *src_y, uint8_t *src_u,
uint8_t *src_v, int32_t pitch_y,
int32_t pitch_u_v,
loop_filter_info *lpf_info_ptr)
{
mbloop_filter_vertical_edge_y_msa(src_y, pitch_y,
*lpf_info_ptr->mblim,
*lpf_info_ptr->lim,
*lpf_info_ptr->hev_thr);
if (src_u)
{
mbloop_filter_vertical_edge_uv_msa(src_u, src_v, pitch_u_v,
*lpf_info_ptr->mblim,
*lpf_info_ptr->lim,
*lpf_info_ptr->hev_thr);
}
}
void vp8_loop_filter_bh_msa(uint8_t *src_y, uint8_t *src_u,
uint8_t *src_v, int32_t pitch_y,
int32_t pitch_u_v,
loop_filter_info *lpf_info_ptr)
{
loop_filter_horizontal_4_dual_msa(src_y + 4 * pitch_y, pitch_y,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr);
loop_filter_horizontal_4_dual_msa(src_y + 8 * pitch_y, pitch_y,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr);
loop_filter_horizontal_4_dual_msa(src_y + 12 * pitch_y, pitch_y,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr);
if (src_u)
{
loop_filter_horizontal_edge_uv_msa(src_u + (4 * pitch_u_v),
src_v + (4 * pitch_u_v),
pitch_u_v,
*lpf_info_ptr->blim,
*lpf_info_ptr->lim,
*lpf_info_ptr->hev_thr);
}
}
void vp8_loop_filter_bv_msa(uint8_t *src_y, uint8_t *src_u,
uint8_t *src_v, int32_t pitch_y,
int32_t pitch_u_v,
loop_filter_info *lpf_info_ptr)
{
loop_filter_vertical_4_dual_msa(src_y + 4, pitch_y, lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr);
loop_filter_vertical_4_dual_msa(src_y + 8, pitch_y,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr);
loop_filter_vertical_4_dual_msa(src_y + 12, pitch_y,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr,
lpf_info_ptr->blim,
lpf_info_ptr->lim,
lpf_info_ptr->hev_thr);
if (src_u)
{
loop_filter_vertical_edge_uv_msa(src_u + 4, src_v + 4, pitch_u_v,
*lpf_info_ptr->blim,
*lpf_info_ptr->lim,
*lpf_info_ptr->hev_thr);
}
}
void vp8_loop_filter_bhs_msa(uint8_t *src_y, int32_t pitch_y,
const uint8_t *b_limit_ptr)
{
vp8_loop_filter_simple_horizontal_edge_msa(src_y + (4 * pitch_y),
pitch_y, b_limit_ptr);
vp8_loop_filter_simple_horizontal_edge_msa(src_y + (8 * pitch_y),
pitch_y, b_limit_ptr);
vp8_loop_filter_simple_horizontal_edge_msa(src_y + (12 * pitch_y),
pitch_y, b_limit_ptr);
}
void vp8_loop_filter_bvs_msa(uint8_t *src_y, int32_t pitch_y,
const uint8_t *b_limit_ptr)
{
vp8_loop_filter_simple_vertical_edge_msa(src_y + 4, pitch_y, b_limit_ptr);
vp8_loop_filter_simple_vertical_edge_msa(src_y + 8, pitch_y, b_limit_ptr);
vp8_loop_filter_simple_vertical_edge_msa(src_y + 12, pitch_y, b_limit_ptr);
}

Просмотреть файл

@ -1,146 +0,0 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vp8/common/postproc.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static void filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
uint8_t *dst_ptr, int32_t dst_stride,
int32_t src_weight)
{
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
int32_t row;
uint64_t src0_d, src1_d, dst0_d, dst1_d;
v16i8 src0 = { 0 };
v16i8 src1 = { 0 };
v16i8 dst0 = { 0 };
v16i8 dst1 = { 0 };
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
src_wt = __msa_fill_h(src_weight);
dst_wt = __msa_fill_h(dst_weight);
for (row = 2; row--;)
{
LD2(src_ptr, src_stride, src0_d, src1_d);
src_ptr += (2 * src_stride);
LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
INSERT_D2_SB(src0_d, src1_d, src0);
INSERT_D2_SB(dst0_d, dst1_d, dst0);
LD2(src_ptr, src_stride, src0_d, src1_d);
src_ptr += (2 * src_stride);
LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
INSERT_D2_SB(src0_d, src1_d, src1);
INSERT_D2_SB(dst0_d, dst1_d, dst1);
UNPCK_UB_SH(src0, src_r, src_l);
UNPCK_UB_SH(dst0, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
ST8x2_UB(dst0, dst_ptr, dst_stride);
dst_ptr += (2 * dst_stride);
UNPCK_UB_SH(src1, src_r, src_l);
UNPCK_UB_SH(dst1, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
ST8x2_UB(dst1, dst_ptr, dst_stride);
dst_ptr += (2 * dst_stride);
}
}
static void filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
uint8_t *dst_ptr, int32_t dst_stride,
int32_t src_weight)
{
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
int32_t row;
v16i8 src0, src1, src2, src3;
v16i8 dst0, dst1, dst2, dst3;
v8i16 src_wt, dst_wt;
v8i16 res_h_r, res_h_l;
v8i16 src_r, src_l, dst_r, dst_l;
src_wt = __msa_fill_h(src_weight);
dst_wt = __msa_fill_h(dst_weight);
for (row = 4; row--;)
{
LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
src_ptr += (4 * src_stride);
LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
UNPCK_UB_SH(src0, src_r, src_l);
UNPCK_UB_SH(dst0, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src1, src_r, src_l);
UNPCK_UB_SH(dst1, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src2, src_r, src_l);
UNPCK_UB_SH(dst2, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src3, src_r, src_l);
UNPCK_UB_SH(dst3, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
}
}
void vp8_filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
uint8_t *dst_ptr, int32_t dst_stride,
int32_t src_weight)
{
filter_by_weight16x16_msa(src_ptr, src_stride, dst_ptr, dst_stride,
src_weight);
}
void vp8_filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
uint8_t *dst_ptr, int32_t dst_stride,
int32_t src_weight)
{
filter_by_weight8x8_msa(src_ptr, src_stride, dst_ptr, dst_stride,
src_weight);
}

Просмотреть файл

@ -1,851 +0,0 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "./vp8_rtcd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static const int16_t vp8_rv_msa[] =
{
8, 5, 2, 2, 8, 12, 4, 9, 8, 3,
0, 3, 9, 0, 0, 0, 8, 3, 14, 4,
10, 1, 11, 14, 1, 14, 9, 6, 12, 11,
8, 6, 10, 0, 0, 8, 9, 0, 3, 14,
8, 11, 13, 4, 2, 9, 0, 3, 9, 6,
1, 2, 3, 14, 13, 1, 8, 2, 9, 7,
3, 3, 1, 13, 13, 6, 6, 5, 2, 7,
11, 9, 11, 8, 7, 3, 2, 0, 13, 13,
14, 4, 12, 5, 12, 10, 8, 10, 13, 10,
4, 14, 4, 10, 0, 8, 11, 1, 13, 7,
7, 14, 6, 14, 13, 2, 13, 5, 4, 4,
0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
3, 8, 3, 7, 8, 5, 11, 4, 12, 3,
11, 9, 14, 8, 14, 13, 4, 3, 1, 2,
14, 6, 5, 4, 4, 11, 4, 6, 2, 1,
5, 8, 8, 12, 13, 5, 14, 10, 12, 13,
0, 9, 5, 5, 11, 10, 13, 9, 10, 13,
};
#define VP8_TRANSPOSE8x16_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3, \
out4, out5, out6, out7, \
out8, out9, out10, out11, \
out12, out13, out14, out15) \
{ \
v8i16 temp0, temp1, temp2, temp3, temp4; \
v8i16 temp5, temp6, temp7, temp8, temp9; \
\
ILVR_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, \
temp0, temp1, temp2, temp3); \
ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
ILVRL_W2_SH(temp5, temp4, temp6, temp7); \
ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
ILVRL_W2_SH(temp5, temp4, temp8, temp9); \
ILVL_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, \
temp0, temp1, temp2, temp3); \
ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
ILVRL_W2_UB(temp5, temp4, out8, out10); \
ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
ILVRL_W2_UB(temp5, temp4, out12, out14); \
out0 = (v16u8)temp6; \
out2 = (v16u8)temp7; \
out4 = (v16u8)temp8; \
out6 = (v16u8)temp9; \
out9 = (v16u8)__msa_ilvl_d((v2i64)out8, (v2i64)out8); \
out11 = (v16u8)__msa_ilvl_d((v2i64)out10, (v2i64)out10); \
out13 = (v16u8)__msa_ilvl_d((v2i64)out12, (v2i64)out12); \
out15 = (v16u8)__msa_ilvl_d((v2i64)out14, (v2i64)out14); \
out1 = (v16u8)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
out3 = (v16u8)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \
out5 = (v16u8)__msa_ilvl_d((v2i64)out4, (v2i64)out4); \
out7 = (v16u8)__msa_ilvl_d((v2i64)out6, (v2i64)out6); \
}
#define VP8_AVER_IF_RETAIN(above2_in, above1_in, src_in, \
below1_in, below2_in, ref, out) \
{ \
v16u8 temp0, temp1; \
\
temp1 = __msa_aver_u_b(above2_in, above1_in); \
temp0 = __msa_aver_u_b(below2_in, below1_in); \
temp1 = __msa_aver_u_b(temp1, temp0); \
out = __msa_aver_u_b(src_in, temp1); \
temp0 = __msa_asub_u_b(src_in, above2_in); \
temp1 = __msa_asub_u_b(src_in, above1_in); \
temp0 = (temp0 < ref); \
temp1 = (temp1 < ref); \
temp0 = temp0 & temp1; \
temp1 = __msa_asub_u_b(src_in, below1_in); \
temp1 = (temp1 < ref); \
temp0 = temp0 & temp1; \
temp1 = __msa_asub_u_b(src_in, below2_in); \
temp1 = (temp1 < ref); \
temp0 = temp0 & temp1; \
out = __msa_bmz_v(out, src_in, temp0); \
}
#define TRANSPOSE12x16_B(in0, in1, in2, in3, in4, in5, in6, in7, \
in8, in9, in10, in11, in12, in13, in14, in15) \
{ \
v8i16 temp0, temp1, temp2, temp3, temp4; \
v8i16 temp5, temp6, temp7, temp8, temp9; \
\
ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1); \
ILVRL_H2_SH(temp1, temp0, temp2, temp3); \
ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1); \
ILVRL_H2_SH(temp1, temp0, temp4, temp5); \
ILVRL_W2_SH(temp4, temp2, temp0, temp1); \
ILVRL_W2_SH(temp5, temp3, temp2, temp3); \
ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5); \
ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5); \
ILVRL_H2_SH(temp5, temp4, temp6, temp7); \
ILVR_B2_SH(in13, in12, in15, in14, temp4, temp5); \
ILVRL_H2_SH(temp5, temp4, temp8, temp9); \
ILVRL_W2_SH(temp8, temp6, temp4, temp5); \
ILVRL_W2_SH(temp9, temp7, temp6, temp7); \
ILVL_B2_SH(in1, in0, in3, in2, temp8, temp9); \
ILVR_D2_UB(temp4, temp0, temp5, temp1, in0, in2); \
in1 = (v16u8)__msa_ilvl_d((v2i64)temp4, (v2i64)temp0); \
in3 = (v16u8)__msa_ilvl_d((v2i64)temp5, (v2i64)temp1); \
ILVL_B2_SH(in5, in4, in7, in6, temp0, temp1); \
ILVR_D2_UB(temp6, temp2, temp7, temp3, in4, in6); \
in5 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp2); \
in7 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp3); \
ILVL_B4_SH(in9, in8, in11, in10, in13, in12, in15, in14, \
temp2, temp3, temp4, temp5); \
ILVR_H4_SH(temp9, temp8, temp1, temp0, temp3, temp2, temp5, temp4, \
temp6, temp7, temp8, temp9); \
ILVR_W2_SH(temp7, temp6, temp9, temp8, temp0, temp1); \
in8 = (v16u8)__msa_ilvr_d((v2i64)temp1, (v2i64)temp0); \
in9 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp0); \
ILVL_W2_SH(temp7, temp6, temp9, temp8, temp2, temp3); \
in10 = (v16u8)__msa_ilvr_d((v2i64)temp3, (v2i64)temp2); \
in11 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp2); \
}
#define VP8_TRANSPOSE12x8_UB_UB(in0, in1, in2, in3, in4, in5, \
in6, in7, in8, in9, in10, in11) \
{ \
v8i16 temp0, temp1, temp2, temp3; \
v8i16 temp4, temp5, temp6, temp7; \
\
ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1); \
ILVRL_H2_SH(temp1, temp0, temp2, temp3); \
ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1); \
ILVRL_H2_SH(temp1, temp0, temp4, temp5); \
ILVRL_W2_SH(temp4, temp2, temp0, temp1); \
ILVRL_W2_SH(temp5, temp3, temp2, temp3); \
ILVL_B2_SH(in1, in0, in3, in2, temp4, temp5); \
temp4 = __msa_ilvr_h(temp5, temp4); \
ILVL_B2_SH(in5, in4, in7, in6, temp6, temp7); \
temp5 = __msa_ilvr_h(temp7, temp6); \
ILVRL_W2_SH(temp5, temp4, temp6, temp7); \
in0 = (v16u8)temp0; \
in2 = (v16u8)temp1; \
in4 = (v16u8)temp2; \
in6 = (v16u8)temp3; \
in8 = (v16u8)temp6; \
in10 = (v16u8)temp7; \
in1 = (v16u8)__msa_ilvl_d((v2i64)temp0, (v2i64)temp0); \
in3 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp1); \
in5 = (v16u8)__msa_ilvl_d((v2i64)temp2, (v2i64)temp2); \
in7 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp3); \
in9 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp6); \
in11 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp7); \
}
static void postproc_down_across_chroma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
int32_t src_stride,
int32_t dst_stride,
int32_t cols, uint8_t *f)
{
uint8_t *p_src = src_ptr;
uint8_t *p_dst = dst_ptr;
uint8_t *f_orig = f;
uint8_t *p_dst_st = dst_ptr;
uint16_t col;
uint64_t out0, out1, out2, out3;
v16u8 above2, above1, below2, below1, src, ref, ref_temp;
v16u8 inter0, inter1, inter2, inter3, inter4, inter5;
v16u8 inter6, inter7, inter8, inter9, inter10, inter11;
for (col = (cols / 16); col--;)
{
ref = LD_UB(f);
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
src = LD_UB(p_src);
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
above2 = LD_UB(p_src + 3 * src_stride);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
above1 = LD_UB(p_src + 4 * src_stride);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
src = LD_UB(p_src + 5 * src_stride);
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
below1 = LD_UB(p_src + 6 * src_stride);
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
below2 = LD_UB(p_src + 7 * src_stride);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
above2 = LD_UB(p_src + 8 * src_stride);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
above1 = LD_UB(p_src + 9 * src_stride);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7,
p_dst, dst_stride);
p_dst += 16;
p_src += 16;
f += 16;
}
if (0 != (cols / 16))
{
ref = LD_UB(f);
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
src = LD_UB(p_src);
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
above2 = LD_UB(p_src + 3 * src_stride);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
above1 = LD_UB(p_src + 4 * src_stride);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
src = LD_UB(p_src + 5 * src_stride);
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
below1 = LD_UB(p_src + 6 * src_stride);
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
below2 = LD_UB(p_src + 7 * src_stride);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
above2 = LD_UB(p_src + 8 * src_stride);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
above1 = LD_UB(p_src + 9 * src_stride);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
out0 = __msa_copy_u_d((v2i64)inter0, 0);
out1 = __msa_copy_u_d((v2i64)inter1, 0);
out2 = __msa_copy_u_d((v2i64)inter2, 0);
out3 = __msa_copy_u_d((v2i64)inter3, 0);
SD4(out0, out1, out2, out3, p_dst, dst_stride);
out0 = __msa_copy_u_d((v2i64)inter4, 0);
out1 = __msa_copy_u_d((v2i64)inter5, 0);
out2 = __msa_copy_u_d((v2i64)inter6, 0);
out3 = __msa_copy_u_d((v2i64)inter7, 0);
SD4(out0, out1, out2, out3, p_dst + 4 * dst_stride, dst_stride);
}
f = f_orig;
p_dst = dst_ptr - 2;
LD_UB8(p_dst, dst_stride,
inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7);
for (col = 0; col < (cols / 8); ++col)
{
ref = LD_UB(f);
f += 8;
VP8_TRANSPOSE12x8_UB_UB(inter0, inter1, inter2, inter3,
inter4, inter5, inter6, inter7,
inter8, inter9, inter10, inter11);
if (0 == col)
{
above2 = inter2;
above1 = inter2;
}
else
{
above2 = inter0;
above1 = inter1;
}
src = inter2;
below1 = inter3;
below2 = inter4;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2,
ref_temp, inter2);
above2 = inter5;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2,
ref_temp, inter3);
above1 = inter6;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1,
ref_temp, inter4);
src = inter7;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3);
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src,
ref_temp, inter5);
below1 = inter8;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4);
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1,
ref_temp, inter6);
below2 = inter9;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2,
ref_temp, inter7);
if (col == (cols / 8 - 1))
{
above2 = inter9;
}
else
{
above2 = inter10;
}
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2,
ref_temp, inter8);
if (col == (cols / 8 - 1))
{
above1 = inter9;
}
else
{
above1 = inter11;
}
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1,
ref_temp, inter9);
TRANSPOSE8x8_UB_UB(inter2, inter3, inter4, inter5, inter6, inter7,
inter8, inter9, inter2, inter3, inter4, inter5,
inter6, inter7, inter8, inter9);
p_dst += 8;
LD_UB2(p_dst, dst_stride, inter0, inter1);
ST8x1_UB(inter2, p_dst_st);
ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride));
LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3);
ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride));
ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride));
LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5);
ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride));
ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride));
LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7);
ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride));
ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride));
p_dst_st += 8;
}
}
static void postproc_down_across_luma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
int32_t src_stride,
int32_t dst_stride,
int32_t cols, uint8_t *f)
{
uint8_t *p_src = src_ptr;
uint8_t *p_dst = dst_ptr;
uint8_t *p_dst_st = dst_ptr;
uint8_t *f_orig = f;
uint16_t col;
v16u8 above2, above1, below2, below1;
v16u8 src, ref, ref_temp;
v16u8 inter0, inter1, inter2, inter3, inter4, inter5, inter6;
v16u8 inter7, inter8, inter9, inter10, inter11;
v16u8 inter12, inter13, inter14, inter15;
for (col = (cols / 16); col--;)
{
ref = LD_UB(f);
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
src = LD_UB(p_src);
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
above2 = LD_UB(p_src + 3 * src_stride);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
above1 = LD_UB(p_src + 4 * src_stride);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
src = LD_UB(p_src + 5 * src_stride);
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
below1 = LD_UB(p_src + 6 * src_stride);
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
below2 = LD_UB(p_src + 7 * src_stride);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
above2 = LD_UB(p_src + 8 * src_stride);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
above1 = LD_UB(p_src + 9 * src_stride);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
src = LD_UB(p_src + 10 * src_stride);
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter8);
below1 = LD_UB(p_src + 11 * src_stride);
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter9);
below2 = LD_UB(p_src + 12 * src_stride);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter10);
above2 = LD_UB(p_src + 13 * src_stride);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter11);
above1 = LD_UB(p_src + 14 * src_stride);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter12);
src = LD_UB(p_src + 15 * src_stride);
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter13);
below1 = LD_UB(p_src + 16 * src_stride);
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter14);
below2 = LD_UB(p_src + 17 * src_stride);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter15);
ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7,
p_dst, dst_stride);
ST_UB8(inter8, inter9, inter10, inter11, inter12, inter13,
inter14, inter15, p_dst + 8 * dst_stride, dst_stride);
p_src += 16;
p_dst += 16;
f += 16;
}
f = f_orig;
p_dst = dst_ptr - 2;
LD_UB8(p_dst, dst_stride,
inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7);
LD_UB8(p_dst + 8 * dst_stride, dst_stride,
inter8, inter9, inter10, inter11, inter12, inter13,
inter14, inter15);
for (col = 0; col < cols / 8; ++col)
{
ref = LD_UB(f);
f += 8;
TRANSPOSE12x16_B(inter0, inter1, inter2, inter3, inter4, inter5,
inter6, inter7, inter8, inter9, inter10, inter11,
inter12, inter13, inter14, inter15);
if (0 == col)
{
above2 = inter2;
above1 = inter2;
}
else
{
above2 = inter0;
above1 = inter1;
}
src = inter2;
below1 = inter3;
below2 = inter4;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2,
ref_temp, inter2);
above2 = inter5;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2,
ref_temp, inter3);
above1 = inter6;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1,
ref_temp, inter4);
src = inter7;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3);
VP8_AVER_IF_RETAIN(below1, below2, above2, above1, src,
ref_temp, inter5);
below1 = inter8;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4);
VP8_AVER_IF_RETAIN(below2, above2, above1, src, below1,
ref_temp, inter6);
below2 = inter9;
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5);
VP8_AVER_IF_RETAIN(above2, above1, src, below1, below2,
ref_temp, inter7);
if (col == (cols / 8 - 1))
{
above2 = inter9;
}
else
{
above2 = inter10;
}
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6);
VP8_AVER_IF_RETAIN(above1, src, below1, below2, above2,
ref_temp, inter8);
if (col == (cols / 8 - 1))
{
above1 = inter9;
}
else
{
above1 = inter11;
}
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7);
VP8_AVER_IF_RETAIN(src, below1, below2, above2, above1,
ref_temp, inter9);
VP8_TRANSPOSE8x16_UB_UB(inter2, inter3, inter4, inter5,
inter6, inter7, inter8, inter9,
inter2, inter3, inter4, inter5,
inter6, inter7, inter8, inter9,
inter10, inter11, inter12, inter13,
inter14, inter15, above2, above1);
p_dst += 8;
LD_UB2(p_dst, dst_stride, inter0, inter1);
ST8x1_UB(inter2, p_dst_st);
ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride));
LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3);
ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride));
ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride));
LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5);
ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride));
ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride));
LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7);
ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride));
ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride));
LD_UB2(p_dst + 8 * dst_stride, dst_stride, inter8, inter9);
ST8x1_UB(inter10, (p_dst_st + 8 * dst_stride));
ST8x1_UB(inter11, (p_dst_st + 9 * dst_stride));
LD_UB2(p_dst + 10 * dst_stride, dst_stride, inter10, inter11);
ST8x1_UB(inter12, (p_dst_st + 10 * dst_stride));
ST8x1_UB(inter13, (p_dst_st + 11 * dst_stride));
LD_UB2(p_dst + 12 * dst_stride, dst_stride, inter12, inter13);
ST8x1_UB(inter14, (p_dst_st + 12 * dst_stride));
ST8x1_UB(inter15, (p_dst_st + 13 * dst_stride));
LD_UB2(p_dst + 14 * dst_stride, dst_stride, inter14, inter15);
ST8x1_UB(above2, (p_dst_st + 14 * dst_stride));
ST8x1_UB(above1, (p_dst_st + 15 * dst_stride));
p_dst_st += 8;
}
}
void vp8_post_proc_down_and_across_mb_row_msa(uint8_t *src, uint8_t *dst,
int32_t src_stride,
int32_t dst_stride,
int32_t cols, uint8_t *f,
int32_t size)
{
if (8 == size)
{
postproc_down_across_chroma_msa(src, dst, src_stride, dst_stride,
cols, f);
}
else if (16 == size)
{
postproc_down_across_luma_msa(src, dst, src_stride, dst_stride,
cols, f);
}
}
void vp8_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch,
int32_t rows, int32_t cols, int32_t flimit)
{
int32_t row, col, cnt;
uint8_t *src_dup = src_ptr;
v16u8 src0, src, tmp_orig;
v16u8 tmp = { 0 };
v16i8 zero = { 0 };
v8u16 sum_h, src_r_h, src_l_h;
v4u32 src_r_w, src_l_w;
v4i32 flimit_vec;
flimit_vec = __msa_fill_w(flimit);
for (row = rows; row--;)
{
int32_t sum_sq = 0;
int32_t sum = 0;
src0 = (v16u8)__msa_fill_b(src_dup[0]);
ST8x1_UB(src0, (src_dup - 8));
src0 = (v16u8)__msa_fill_b(src_dup[cols - 1]);
ST_UB(src0, src_dup + cols);
src_dup[cols + 16] = src_dup[cols - 1];
tmp_orig = (v16u8)__msa_ldi_b(0);
tmp_orig[15] = tmp[15];
src = LD_UB(src_dup - 8);
src[15] = 0;
ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
src_r_w = __msa_dotp_u_w(src_r_h, src_r_h);
src_l_w = __msa_dotp_u_w(src_l_h, src_l_h);
sum_sq = HADD_SW_S32(src_r_w);
sum_sq += HADD_SW_S32(src_l_w);
sum_h = __msa_hadd_u_h(src, src);
sum = HADD_UH_U32(sum_h);
{
v16u8 src7, src8, src_r, src_l;
v16i8 mask;
v8u16 add_r, add_l;
v8i16 sub_r, sub_l, sum_r, sum_l, mask0, mask1;
v4i32 sum_sq0, sum_sq1, sum_sq2, sum_sq3;
v4i32 sub0, sub1, sub2, sub3;
v4i32 sum0_w, sum1_w, sum2_w, sum3_w;
v4i32 mul0, mul1, mul2, mul3;
v4i32 total0, total1, total2, total3;
v8i16 const8 = __msa_fill_h(8);
src7 = LD_UB(src_dup + 7);
src8 = LD_UB(src_dup - 8);
for (col = 0; col < (cols >> 4); ++col)
{
ILVRL_B2_UB(src7, src8, src_r, src_l);
HSUB_UB2_SH(src_r, src_l, sub_r, sub_l);
sum_r[0] = sum + sub_r[0];
for (cnt = 0; cnt < 7; ++cnt)
{
sum_r[cnt + 1] = sum_r[cnt] + sub_r[cnt + 1];
}
sum_l[0] = sum_r[7] + sub_l[0];
for (cnt = 0; cnt < 7; ++cnt)
{
sum_l[cnt + 1] = sum_l[cnt] + sub_l[cnt + 1];
}
sum = sum_l[7];
src = LD_UB(src_dup + 16 * col);
ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
src7 = (v16u8)((const8 + sum_r + (v8i16)src_r_h) >> 4);
src8 = (v16u8)((const8 + sum_l + (v8i16)src_l_h) >> 4);
tmp = (v16u8)__msa_pckev_b((v16i8)src8, (v16i8)src7);
HADD_UB2_UH(src_r, src_l, add_r, add_l);
UNPCK_SH_SW(sub_r, sub0, sub1);
UNPCK_SH_SW(sub_l, sub2, sub3);
ILVR_H2_SW(zero, add_r, zero, add_l, sum0_w, sum2_w);
ILVL_H2_SW(zero, add_r, zero, add_l, sum1_w, sum3_w);
MUL4(sum0_w, sub0, sum1_w, sub1, sum2_w, sub2, sum3_w, sub3,
mul0, mul1, mul2, mul3);
sum_sq0[0] = sum_sq + mul0[0];
for (cnt = 0; cnt < 3; ++cnt)
{
sum_sq0[cnt + 1] = sum_sq0[cnt] + mul0[cnt + 1];
}
sum_sq1[0] = sum_sq0[3] + mul1[0];
for (cnt = 0; cnt < 3; ++cnt)
{
sum_sq1[cnt + 1] = sum_sq1[cnt] + mul1[cnt + 1];
}
sum_sq2[0] = sum_sq1[3] + mul2[0];
for (cnt = 0; cnt < 3; ++cnt)
{
sum_sq2[cnt + 1] = sum_sq2[cnt] + mul2[cnt + 1];
}
sum_sq3[0] = sum_sq2[3] + mul3[0];
for (cnt = 0; cnt < 3; ++cnt)
{
sum_sq3[cnt + 1] = sum_sq3[cnt] + mul3[cnt + 1];
}
sum_sq = sum_sq3[3];
UNPCK_SH_SW(sum_r, sum0_w, sum1_w);
UNPCK_SH_SW(sum_l, sum2_w, sum3_w);
total0 = sum_sq0 * __msa_ldi_w(15);
total0 -= sum0_w * sum0_w;
total1 = sum_sq1 * __msa_ldi_w(15);
total1 -= sum1_w * sum1_w;
total2 = sum_sq2 * __msa_ldi_w(15);
total2 -= sum2_w * sum2_w;
total3 = sum_sq3 * __msa_ldi_w(15);
total3 -= sum3_w * sum3_w;
total0 = (total0 < flimit_vec);
total1 = (total1 < flimit_vec);
total2 = (total2 < flimit_vec);
total3 = (total3 < flimit_vec);
PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1);
mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0);
tmp = __msa_bmz_v(tmp, src, (v16u8)mask);
if (col == 0)
{
uint64_t src_d;
src_d = __msa_copy_u_d((v2i64)tmp_orig, 1);
SD(src_d, (src_dup - 8));
}
src7 = LD_UB(src_dup + 16 * (col + 1) + 7);
src8 = LD_UB(src_dup + 16 * (col + 1) - 8);
ST_UB(tmp, (src_dup + (16 * col)));
}
src_dup += pitch;
}
}
}
void vp8_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows,
int32_t cols, int32_t flimit)
{
int32_t row, col, cnt, i;
const int16_t *rv3 = &vp8_rv_msa[63 & rand()];
v4i32 flimit_vec;
v16u8 dst7, dst8, dst_r_b, dst_l_b;
v16i8 mask;
v8u16 add_r, add_l;
v8i16 dst_r_h, dst_l_h, sub_r, sub_l, mask0, mask1;
v4i32 sub0, sub1, sub2, sub3, total0, total1, total2, total3;
flimit_vec = __msa_fill_w(flimit);
for (col = 0; col < (cols >> 4); ++col)
{
uint8_t *dst_tmp = &dst_ptr[col << 4];
v16u8 dst;
v16i8 zero = { 0 };
v16u8 tmp[16];
v8i16 mult0, mult1, rv2_0, rv2_1;
v8i16 sum0_h = { 0 };
v8i16 sum1_h = { 0 };
v4i32 mul0 = { 0 };
v4i32 mul1 = { 0 };
v4i32 mul2 = { 0 };
v4i32 mul3 = { 0 };
v4i32 sum0_w, sum1_w, sum2_w, sum3_w;
v4i32 add0, add1, add2, add3;
const int16_t *rv2[16];
dst = LD_UB(dst_tmp);
for (cnt = (col << 4), i = 0; i < 16; ++cnt)
{
rv2[i] = rv3 + ((cnt * 17) & 127);
++i;
}
for (cnt = -8; cnt < 0; ++cnt)
{
ST_UB(dst, dst_tmp + cnt * pitch);
}
dst = LD_UB((dst_tmp + (rows - 1) * pitch));
for (cnt = rows; cnt < rows + 17; ++cnt)
{
ST_UB(dst, dst_tmp + cnt * pitch);
}
for (cnt = -8; cnt <= 6; ++cnt)
{
dst = LD_UB(dst_tmp + (cnt * pitch));
UNPCK_UB_SH(dst, dst_r_h, dst_l_h);
MUL2(dst_r_h, dst_r_h, dst_l_h, dst_l_h, mult0, mult1);
mul0 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult0);
mul1 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult0);
mul2 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult1);
mul3 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult1);
ADD2(sum0_h, dst_r_h, sum1_h, dst_l_h, sum0_h, sum1_h);
}
for (row = 0; row < (rows + 8); ++row)
{
for (i = 0; i < 8; ++i)
{
rv2_0[i] = *(rv2[i] + (row & 127));
rv2_1[i] = *(rv2[i + 8] + (row & 127));
}
dst7 = LD_UB(dst_tmp + (7 * pitch));
dst8 = LD_UB(dst_tmp - (8 * pitch));
ILVRL_B2_UB(dst7, dst8, dst_r_b, dst_l_b);
HSUB_UB2_SH(dst_r_b, dst_l_b, sub_r, sub_l);
UNPCK_SH_SW(sub_r, sub0, sub1);
UNPCK_SH_SW(sub_l, sub2, sub3);
sum0_h += sub_r;
sum1_h += sub_l;
HADD_UB2_UH(dst_r_b, dst_l_b, add_r, add_l);
ILVRL_H2_SW(zero, add_r, add0, add1);
ILVRL_H2_SW(zero, add_l, add2, add3);
mul0 += add0 * sub0;
mul1 += add1 * sub1;
mul2 += add2 * sub2;
mul3 += add3 * sub3;
dst = LD_UB(dst_tmp);
ILVRL_B2_SH(zero, dst, dst_r_h, dst_l_h);
dst7 = (v16u8)((rv2_0 + sum0_h + dst_r_h) >> 4);
dst8 = (v16u8)((rv2_1 + sum1_h + dst_l_h) >> 4);
tmp[row & 15] = (v16u8)__msa_pckev_b((v16i8)dst8, (v16i8)dst7);
UNPCK_SH_SW(sum0_h, sum0_w, sum1_w);
UNPCK_SH_SW(sum1_h, sum2_w, sum3_w);
total0 = mul0 * __msa_ldi_w(15);
total0 -= sum0_w * sum0_w;
total1 = mul1 * __msa_ldi_w(15);
total1 -= sum1_w * sum1_w;
total2 = mul2 * __msa_ldi_w(15);
total2 -= sum2_w * sum2_w;
total3 = mul3 * __msa_ldi_w(15);
total3 -= sum3_w * sum3_w;
total0 = (total0 < flimit_vec);
total1 = (total1 < flimit_vec);
total2 = (total2 < flimit_vec);
total3 = (total3 < flimit_vec);
PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1);
mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0);
tmp[row & 15] = __msa_bmz_v(tmp[row & 15], dst, (v16u8)mask);
if (row >= 8)
{
ST_UB(tmp[(row - 8) & 15], (dst_tmp - 8 * pitch));
}
dst_tmp += pitch;
}
}
}
void vp8_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
char blackclamp[16], char whiteclamp[16],
char bothclamp[16],
uint32_t width, uint32_t height,
int32_t pitch)
{
uint32_t i, j;
for (i = 0; i < height / 2; ++i)
{
uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
int8_t *ref0_ptr = (int8_t *) (noise + (rand() & 0xff));
uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
int8_t *ref1_ptr = (int8_t *) (noise + (rand() & 0xff));
for (j = width / 16; j--;)
{
v16i8 temp00_s, temp01_s;
v16u8 temp00, temp01, black_clamp, white_clamp;
v16u8 pos0, ref0, pos1, ref1;
v16i8 const127 = __msa_ldi_b(127);
pos0 = LD_UB(pos0_ptr);
ref0 = LD_UB(ref0_ptr);
pos1 = LD_UB(pos1_ptr);
ref1 = LD_UB(ref1_ptr);
black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
temp00 = (pos0 < black_clamp);
pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
temp01 = (pos1 < black_clamp);
pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
XORI_B2_128_UB(pos0, pos1);
temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
temp00 = (v16u8)(temp00_s < pos0);
pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
temp01 = (temp01_s < pos1);
pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
XORI_B2_128_UB(pos0, pos1);
pos0 += ref0;
ST_UB(pos0, pos0_ptr);
pos1 += ref1;
ST_UB(pos1, pos1_ptr);
pos0_ptr += 16;
pos1_ptr += 16;
ref0_ptr += 16;
ref1_ptr += 16;
}
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,40 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "entropy.h"
const int vp8_mode_contexts[6][4] =
{
{
/* 0 */
7, 1, 1, 143,
},
{
/* 1 */
14, 18, 14, 107,
},
{
/* 2 */
135, 64, 57, 68,
},
{
/* 3 */
60, 56, 128, 65,
},
{
/* 4 */
159, 134, 128, 34,
},
{
/* 5 */
234, 188, 128, 28,
},
};

Просмотреть файл

@ -1,25 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_MODECONT_H_
#define VP8_COMMON_MODECONT_H_
#ifdef __cplusplus
extern "C" {
#endif
extern const int vp8_mode_contexts[6][4];
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_MODECONT_H_

Просмотреть файл

@ -1,36 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_MV_H_
#define VP8_COMMON_MV_H_
#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct
{
short row;
short col;
} MV;
typedef union int_mv
{
uint32_t as_int;
MV as_mv;
} int_mv; /* facilitates faster equality tests and copies */
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_MV_H_

Просмотреть файл

@ -1,282 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ONYX_H_
#define VP8_COMMON_ONYX_H_
#ifdef __cplusplus
extern "C"
{
#endif
#include "vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
#include "vpx_scale/yv12config.h"
#include "ppflags.h"
struct VP8_COMP;
/* Create/destroy static data structures. */
typedef enum
{
NORMAL = 0,
FOURFIVE = 1,
THREEFIVE = 2,
ONETWO = 3
} VPX_SCALING;
typedef enum
{
USAGE_LOCAL_FILE_PLAYBACK = 0x0,
USAGE_STREAM_FROM_SERVER = 0x1,
USAGE_CONSTRAINED_QUALITY = 0x2,
USAGE_CONSTANT_QUALITY = 0x3
} END_USAGE;
typedef enum
{
MODE_REALTIME = 0x0,
MODE_GOODQUALITY = 0x1,
MODE_BESTQUALITY = 0x2,
MODE_FIRSTPASS = 0x3,
MODE_SECONDPASS = 0x4,
MODE_SECONDPASS_BEST = 0x5
} MODE;
typedef enum
{
FRAMEFLAGS_KEY = 1,
FRAMEFLAGS_GOLDEN = 2,
FRAMEFLAGS_ALTREF = 4
} FRAMETYPE_FLAGS;
#include <assert.h>
static INLINE void Scale2Ratio(int mode, int *hr, int *hs)
{
switch (mode)
{
case NORMAL:
*hr = 1;
*hs = 1;
break;
case FOURFIVE:
*hr = 4;
*hs = 5;
break;
case THREEFIVE:
*hr = 3;
*hs = 5;
break;
case ONETWO:
*hr = 1;
*hs = 2;
break;
default:
*hr = 1;
*hs = 1;
assert(0);
break;
}
}
typedef struct
{
/* 4 versions of bitstream defined:
* 0 best quality/slowest decode, 3 lowest quality/fastest decode
*/
int Version;
int Width;
int Height;
struct vpx_rational timebase;
unsigned int target_bandwidth; /* kilobits per second */
/* Parameter used for applying denoiser.
* For temporal denoiser: noise_sensitivity = 0 means off,
* noise_sensitivity = 1 means temporal denoiser on for Y channel only,
* noise_sensitivity = 2 means temporal denoiser on for all channels.
* noise_sensitivity = 3 means aggressive denoising mode.
* noise_sensitivity >= 4 means adaptive denoising mode.
* Temporal denoiser is enabled via the configuration option:
* CONFIG_TEMPORAL_DENOISING.
* For spatial denoiser: noise_sensitivity controls the amount of
* pre-processing blur: noise_sensitivity = 0 means off.
* Spatial denoiser invoked under !CONFIG_TEMPORAL_DENOISING.
*/
int noise_sensitivity;
/* parameter used for sharpening output: recommendation 0: */
int Sharpness;
int cpu_used;
unsigned int rc_max_intra_bitrate_pct;
unsigned int screen_content_mode;
/* mode ->
*(0)=Realtime/Live Encoding. This mode is optimized for realtim
* encoding (for example, capturing a television signal or feed
* from a live camera). ( speed setting controls how fast )
*(1)=Good Quality Fast Encoding. The encoder balances quality with
* the amount of time it takes to encode the output. ( speed
* setting controls how fast )
*(2)=One Pass - Best Quality. The encoder places priority on the
* quality of the output over encoding speed. The output is
* compressed at the highest possible quality. This option takes
* the longest amount of time to encode. ( speed setting ignored
* )
*(3)=Two Pass - First Pass. The encoder generates a file of
* statistics for use in the second encoding pass. ( speed
* setting controls how fast )
*(4)=Two Pass - Second Pass. The encoder uses the statistics that
* were generated in the first encoding pass to create the
* compressed output. ( speed setting controls how fast )
*(5)=Two Pass - Second Pass Best. The encoder uses the statistics
* that were generated in the first encoding pass to create the
* compressed output using the highest possible quality, and
* taking a longer amount of time to encode.. ( speed setting
* ignored )
*/
int Mode;
/* Key Framing Operations */
int auto_key; /* automatically detect cut scenes */
int key_freq; /* maximum distance to key frame. */
/* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */
int allow_lag;
int lag_in_frames; /* how many frames lag before we start encoding */
/*
* DATARATE CONTROL OPTIONS
*/
int end_usage; /* vbr or cbr */
/* buffer targeting aggressiveness */
int under_shoot_pct;
int over_shoot_pct;
/* buffering parameters */
int64_t starting_buffer_level;
int64_t optimal_buffer_level;
int64_t maximum_buffer_size;
int64_t starting_buffer_level_in_ms;
int64_t optimal_buffer_level_in_ms;
int64_t maximum_buffer_size_in_ms;
/* controlling quality */
int fixed_q;
int worst_allowed_q;
int best_allowed_q;
int cq_level;
/* allow internal resizing */
int allow_spatial_resampling;
int resample_down_water_mark;
int resample_up_water_mark;
/* allow internal frame rate alterations */
int allow_df;
int drop_frames_water_mark;
/* two pass datarate control */
int two_pass_vbrbias;
int two_pass_vbrmin_section;
int two_pass_vbrmax_section;
/*
* END DATARATE CONTROL OPTIONS
*/
/* these parameters aren't to be used in final build don't use!!! */
int play_alternate;
int alt_freq;
int alt_q;
int key_q;
int gold_q;
int multi_threaded; /* how many threads to run the encoder on */
int token_partitions; /* how many token partitions to create */
/* early breakout threshold: for video conf recommend 800 */
int encode_breakout;
/* Bitfield defining the error resiliency features to enable.
* Can provide decodable frames after losses in previous
* frames and decodable partitions after losses in the same frame.
*/
unsigned int error_resilient_mode;
int arnr_max_frames;
int arnr_strength;
int arnr_type;
vpx_fixed_buf_t two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
vp8e_tuning tuning;
/* Temporal scaling parameters */
unsigned int number_of_layers;
unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY];
unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY];
unsigned int periodicity;
unsigned int layer_id[VPX_TS_MAX_PERIODICITY];
#if CONFIG_MULTI_RES_ENCODING
/* Number of total resolutions encoded */
unsigned int mr_total_resolutions;
/* Current encoder ID */
unsigned int mr_encoder_id;
/* Down-sampling factor */
vpx_rational_t mr_down_sampling_factor;
/* Memory location to store low-resolution encoder's mode info */
void* mr_low_res_mode_info;
#endif
} VP8_CONFIG;
void vp8_initialize();
struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf);
void vp8_remove_compressor(struct VP8_COMP* *comp);
void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags);
int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags);
int vp8_get_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
int vp8_set_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
int vp8_update_entropy(struct VP8_COMP* comp, int update);
int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols);
int vp8_set_internal_size(struct VP8_COMP* comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
int vp8_get_quantizer(struct VP8_COMP* c);
#ifdef __cplusplus
}
#endif
#endif // VP8_COMMON_ONYX_H_

Просмотреть файл

@ -1,185 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ONYXC_INT_H_
#define VP8_COMMON_ONYXC_INT_H_
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "loopfilter.h"
#include "entropymv.h"
#include "entropy.h"
#if CONFIG_POSTPROC
#include "postproc.h"
#endif
/*#ifdef PACKET_TESTING*/
#include "header.h"
/*#endif*/
#ifdef __cplusplus
extern "C" {
#endif
#define MINQ 0
#define MAXQ 127
#define QINDEX_RANGE (MAXQ + 1)
#define NUM_YV12_BUFFERS 4
#define MAX_PARTITIONS 9
typedef struct frame_contexts
{
vp8_prob bmode_prob [VP8_BINTRAMODES-1];
vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */
vp8_prob uv_mode_prob [VP8_UV_MODES-1];
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
MV_CONTEXT mvc[2];
} FRAME_CONTEXT;
typedef enum
{
ONE_PARTITION = 0,
TWO_PARTITION = 1,
FOUR_PARTITION = 2,
EIGHT_PARTITION = 3
} TOKEN_PARTITION;
typedef enum
{
RECON_CLAMP_REQUIRED = 0,
RECON_CLAMP_NOTREQUIRED = 1
} CLAMP_TYPE;
typedef struct VP8Common
{
struct vpx_internal_error_info error;
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
int Width;
int Height;
int horiz_scale;
int vert_scale;
CLAMP_TYPE clamp_type;
YV12_BUFFER_CONFIG *frame_to_show;
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
YV12_BUFFER_CONFIG temp_scale_frame;
#if CONFIG_POSTPROC
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG post_proc_buffer_int;
int post_proc_buffer_int_used;
unsigned char *pp_limits_buffer; /* post-processing filter coefficients */
#endif
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
FRAME_TYPE frame_type;
int show_frame;
int frame_flags;
int MBs;
int mb_rows;
int mb_cols;
int mode_info_stride;
/* profile settings */
int mb_no_coeff_skip;
int no_lpf;
int use_bilinear_mc_filter;
int full_pixel;
int base_qindex;
int y1dc_delta_q;
int y2dc_delta_q;
int y2ac_delta_q;
int uvdc_delta_q;
int uvac_delta_q;
/* We allocate a MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
MODE_INFO *mip; /* Base of allocated array */
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
#if CONFIG_ERROR_CONCEALMENT
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
#endif
MODE_INFO *show_frame_mi; /* MODE_INFO for the last decoded frame
to show */
LOOPFILTERTYPE filter_type;
loop_filter_info_n lf_info;
int filter_level;
int last_sharpness_level;
int sharpness_level;
int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
/* Y,U,V,Y2 */
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
FRAME_CONTEXT lfc; /* last frame entropy */
FRAME_CONTEXT fc; /* this frame entropy */
unsigned int current_video_frame;
int version;
TOKEN_PARTITION multi_token_partition;
#ifdef PACKET_TESTING
VP8_HEADER oh;
#endif
#if CONFIG_POSTPROC_VISUALIZER
double bitrate;
double framerate;
#endif
#if CONFIG_MULTITHREAD
int processor_core_count;
#endif
#if CONFIG_POSTPROC
struct postproc_state postproc_state;
#endif
int cpu_caps;
} VP8_COMMON;
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_ONYXC_INT_H_

Просмотреть файл

@ -1,63 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ONYXD_H_
#define VP8_COMMON_ONYXD_H_
/* Create/destroy static data structures. */
#ifdef __cplusplus
extern "C"
{
#endif
#include "vpx_scale/yv12config.h"
#include "ppflags.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_codec.h"
#include "vpx/vp8.h"
struct VP8D_COMP;
typedef struct
{
int Width;
int Height;
int Version;
int postprocess;
int max_threads;
int error_concealment;
} VP8D_CONFIG;
typedef enum
{
VP8D_OK = 0
} VP8D_SETTING;
void vp8dx_initialize(void);
void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x);
int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
int vp8dx_receive_compressed_data(struct VP8D_COMP* comp,
size_t size, const uint8_t *dest,
int64_t time_stamp);
int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
#ifdef __cplusplus
}
#endif
#endif // VP8_COMMON_ONYXD_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,59 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_POSTPROC_H_
#define VP8_COMMON_POSTPROC_H_
#include "vpx_ports/mem.h"
struct postproc_state
{
int last_q;
int last_noise;
char noise[3072];
int last_base_qindex;
int last_frame_valid;
DECLARE_ALIGNED(16, char, blackclamp[16]);
DECLARE_ALIGNED(16, char, whiteclamp[16]);
DECLARE_ALIGNED(16, char, bothclamp[16]);
};
#include "onyxc_int.h"
#include "ppflags.h"
#ifdef __cplusplus
extern "C" {
#endif
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
vp8_ppflags_t *flags);
void vp8_de_noise(struct VP8Common *oci,
YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post,
int q,
int low_var_thresh,
int flag,
int uvfilter);
void vp8_deblock(struct VP8Common *oci,
YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post,
int q,
int low_var_thresh,
int flag);
#define MFQE_PRECISION 4
void vp8_multiframe_quality_enhance(struct VP8Common *cm);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_POSTPROC_H_

Просмотреть файл

@ -1,49 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_PPFLAGS_H_
#define VP8_COMMON_PPFLAGS_H_
#ifdef __cplusplus
extern "C" {
#endif
enum
{
VP8D_NOFILTERING = 0,
VP8D_DEBLOCK = 1<<0,
VP8D_DEMACROBLOCK = 1<<1,
VP8D_ADDNOISE = 1<<2,
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
VP8D_DEBUG_DRAW_MV = 1<<7,
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9,
VP8D_MFQE = 1<<10
};
typedef struct
{
int post_proc_flag;
int deblocking_level;
int noise_level;
int display_ref_frame_flag;
int display_mb_modes_flag;
int display_b_modes_flag;
int display_mv_flag;
} vp8_ppflags_t;
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_PPFLAGS_H_

Просмотреть файл

@ -1,135 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "quant_common.h"
static const int dc_qlookup[QINDEX_RANGE] =
{
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
};
static const int ac_qlookup[QINDEX_RANGE] =
{
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
};
int vp8_dc_quant(int QIndex, int Delta)
{
int retval;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = dc_qlookup[ QIndex ];
return retval;
}
int vp8_dc2quant(int QIndex, int Delta)
{
int retval;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = dc_qlookup[ QIndex ] * 2;
return retval;
}
int vp8_dc_uv_quant(int QIndex, int Delta)
{
int retval;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = dc_qlookup[ QIndex ];
if (retval > 132)
retval = 132;
return retval;
}
int vp8_ac_yquant(int QIndex)
{
int retval;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = ac_qlookup[ QIndex ];
return retval;
}
int vp8_ac2quant(int QIndex, int Delta)
{
int retval;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
/* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
* The smallest precision for that is '(x*6349) >> 12' but 16 is a good
* word size. */
retval = (ac_qlookup[ QIndex ] * 101581) >> 16;
if (retval < 8)
retval = 8;
return retval;
}
int vp8_ac_uv_quant(int QIndex, int Delta)
{
int retval;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = ac_qlookup[ QIndex ];
return retval;
}

Просмотреть файл

@ -1,34 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_QUANT_COMMON_H_
#define VP8_COMMON_QUANT_COMMON_H_
#include "string.h"
#include "blockd.h"
#include "onyxc_int.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int vp8_ac_yquant(int QIndex);
extern int vp8_dc_quant(int QIndex, int Delta);
extern int vp8_dc2quant(int QIndex, int Delta);
extern int vp8_ac2quant(int QIndex, int Delta);
extern int vp8_dc_uv_quant(int QIndex, int Delta);
extern int vp8_ac_uv_quant(int QIndex, int Delta);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_QUANT_COMMON_H_

Просмотреть файл

@ -1,544 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <limits.h>
#include <string.h>
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#include "blockd.h"
#include "reconinter.h"
#if CONFIG_RUNTIME_CPU_DETECT
#include "onyxc_int.h"
#endif
void vp8_copy_mem16x16_c(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride)
{
int r;
for (r = 0; r < 16; r++)
{
memcpy(dst, src, 16);
src += src_stride;
dst += dst_stride;
}
}
void vp8_copy_mem8x8_c(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride)
{
int r;
for (r = 0; r < 8; r++)
{
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;
}
}
void vp8_copy_mem8x4_c(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride)
{
int r;
for (r = 0; r < 4; r++)
{
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;
}
}
void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
{
int r;
unsigned char *pred_ptr = d->predictor;
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
}
else
{
for (r = 0; r < 4; r++)
{
pred_ptr[0] = ptr[0];
pred_ptr[1] = ptr[1];
pred_ptr[2] = ptr[2];
pred_ptr[3] = ptr[3];
pred_ptr += pitch;
ptr += pre_stride;
}
}
}
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
{
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
}
else
{
vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride);
}
}
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
{
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
}
else
{
vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride);
}
}
static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
{
int r;
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
}
else
{
for (r = 0; r < 4; r++)
{
dst[0] = ptr[0];
dst[1] = ptr[1];
dst[2] = ptr[2];
dst[3] = ptr[3];
dst += dst_stride;
ptr += pre_stride;
}
}
}
/*encoder only*/
void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
{
unsigned char *uptr, *vptr;
unsigned char *upred_ptr = &x->predictor[256];
unsigned char *vpred_ptr = &x->predictor[320];
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int offset;
int pre_stride = x->pre.uv_stride;
/* calc uv motion vectors */
mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1));
mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1));
mv_row /= 2;
mv_col /= 2;
mv_row &= x->fullpixel_mask;
mv_col &= x->fullpixel_mask;
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
}
else
{
vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8);
vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8);
}
}
/*encoder only*/
void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
{
int i, j;
int pre_stride = x->pre.uv_stride;
unsigned char *base_pre;
/* build uv mvs */
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
int yoffset = i * 8 + j * 2;
int uoffset = 16 + i * 2 + j;
int voffset = 20 + i * 2 + j;
int temp;
temp = x->block[yoffset ].bmi.mv.as_mv.row
+ x->block[yoffset+1].bmi.mv.as_mv.row
+ x->block[yoffset+4].bmi.mv.as_mv.row
+ x->block[yoffset+5].bmi.mv.as_mv.row;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
temp = x->block[yoffset ].bmi.mv.as_mv.col
+ x->block[yoffset+1].bmi.mv.as_mv.col
+ x->block[yoffset+4].bmi.mv.as_mv.col
+ x->block[yoffset+5].bmi.mv.as_mv.col;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
}
}
base_pre = x->pre.u_buffer;
for (i = 16; i < 20; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
else
{
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
}
}
base_pre = x->pre.v_buffer;
for (i = 20; i < 24; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
else
{
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
}
}
}
/*encoder only*/
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
unsigned char *dst_y,
int dst_ystride)
{
unsigned char *ptr_base;
unsigned char *ptr;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->pre.y_stride;
ptr_base = x->pre.y_buffer;
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7,
dst_y, dst_ystride);
}
else
{
vp8_copy_mem16x16(ptr, pre_stride, dst_y,
dst_ystride);
}
}
static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
{
/* If the MV points so far into the UMV border that no visible pixels
* are used for reconstruction, the subpel part of the MV can be
* discarded and the MV limited to 16 pixels with equivalent results.
*
* This limit kicks in at 19 pixels for the top and left edges, for
* the 16 pixels plus 3 taps right of the central pixel when subpel
* filtering. The bottom and right edges use 16 pixels plus 2 pixels
* left of the central pixel when filtering.
*/
if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
mv->col = xd->mb_to_left_edge - (16 << 3);
else if (mv->col > xd->mb_to_right_edge + (18 << 3))
mv->col = xd->mb_to_right_edge + (16 << 3);
if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
mv->row = xd->mb_to_top_edge - (16 << 3);
else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
mv->row = xd->mb_to_bottom_edge + (16 << 3);
}
/* A version of the above function for chroma block MVs.*/
static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
{
mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ?
(xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ?
(xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ?
(xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ?
(xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
}
void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
unsigned char *dst_y,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_ystride,
int dst_uvstride)
{
int offset;
unsigned char *ptr;
unsigned char *uptr, *vptr;
int_mv _16x16mv;
unsigned char *ptr_base = x->pre.y_buffer;
int pre_stride = x->pre.y_stride;
_16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int;
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
{
clamp_mv_to_umv_border(&_16x16mv.as_mv, x);
}
ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
if ( _16x16mv.as_int & 0x00070007)
{
x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride);
}
else
{
vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
}
/* calc uv motion vectors */
_16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1));
_16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1));
_16x16mv.as_mv.row /= 2;
_16x16mv.as_mv.col /= 2;
_16x16mv.as_mv.row &= x->fullpixel_mask;
_16x16mv.as_mv.col &= x->fullpixel_mask;
pre_stride >>= 1;
offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
if ( _16x16mv.as_int & 0x00070007)
{
x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
}
else
{
vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
}
}
static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
{
int i;
unsigned char *base_dst = x->dst.y_buffer;
unsigned char *base_pre = x->pre.y_buffer;
if (x->mode_info_context->mbmi.partitioning < 3)
{
BLOCKD *b;
int dst_stride = x->dst.y_stride;
x->block[ 0].bmi = x->mode_info_context->bmi[ 0];
x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
x->block[10].bmi = x->mode_info_context->bmi[10];
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
{
clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x);
}
b = &x->block[ 0];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
b = &x->block[ 2];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
b = &x->block[ 8];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
b = &x->block[10];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
}
else
{
for (i = 0; i < 16; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
int dst_stride = x->dst.y_stride;
x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
{
clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x);
}
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
else
{
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
}
}
}
base_dst = x->dst.u_buffer;
base_pre = x->pre.u_buffer;
for (i = 16; i < 20; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
int dst_stride = x->dst.uv_stride;
/* Note: uv mvs already clamped in build_4x4uvmvs() */
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
else
{
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
}
}
base_dst = x->dst.v_buffer;
base_pre = x->pre.v_buffer;
for (i = 20; i < 24; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
int dst_stride = x->dst.uv_stride;
/* Note: uv mvs already clamped in build_4x4uvmvs() */
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
else
{
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
}
}
}
static
void build_4x4uvmvs(MACROBLOCKD *x)
{
int i, j;
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
int yoffset = i * 8 + j * 2;
int uoffset = 16 + i * 2 + j;
int voffset = 20 + i * 2 + j;
int temp;
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
}
}
}
void vp8_build_inter_predictors_mb(MACROBLOCKD *xd)
{
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.y_stride, xd->dst.uv_stride);
}
else
{
build_4x4uvmvs(xd);
build_inter4x4_predictors_mb(xd);
}
}

Просмотреть файл

@ -1,43 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_RECONINTER_H_
#define VP8_COMMON_RECONINTER_H_
#ifdef __cplusplus
extern "C" {
#endif
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
unsigned char *dst_y,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_ystride,
int dst_uvstride);
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
unsigned char *dst_y,
int dst_ystride);
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
unsigned char *base_pre,
int pre_stride,
vp8_subpix_fn_t sppf);
extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_RECONINTER_H_

Просмотреть файл

@ -1,117 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "./vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/vpx_once.h"
#include "blockd.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/reconintra4x4.h"
enum {
SIZE_16,
SIZE_8,
NUM_SIZES,
};
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
static intra_pred_fn pred[4][NUM_SIZES];
static intra_pred_fn dc_pred[2][2][NUM_SIZES];
static void vp8_init_intra_predictors_internal(void)
{
#define INIT_SIZE(sz) \
pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
\
dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
INIT_SIZE(16);
INIT_SIZE(8);
vp8_init_intra4x4_predictors_internal();
}
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride)
{
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
DECLARE_ALIGNED(16, uint8_t, yleft_col[16]);
int i;
intra_pred_fn fn;
for (i = 0; i < 16; i++)
{
yleft_col[i] = yleft[i* left_stride];
}
if (mode == DC_PRED)
{
fn = dc_pred[x->left_available][x->up_available][SIZE_16];
}
else
{
fn = pred[mode][SIZE_16];
}
fn(ypred_ptr, y_stride, yabove_row, yleft_col);
}
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride)
{
MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
unsigned char uleft_col[8];
unsigned char vleft_col[8];
int i;
intra_pred_fn fn;
for (i = 0; i < 8; i++)
{
uleft_col[i] = uleft[i * left_stride];
vleft_col[i] = vleft[i * left_stride];
}
if (uvmode == DC_PRED)
{
fn = dc_pred[x->left_available][x->up_available][SIZE_8];
}
else
{
fn = pred[uvmode][SIZE_8];
}
fn(upred_ptr, pred_stride, uabove_row, uleft_col);
fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
}
void vp8_init_intra_predictors(void)
{
once(vp8_init_intra_predictors_internal);
}

Просмотреть файл

@ -1,44 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_RECONINTRA_H_
#define VP8_COMMON_RECONINTRA_H_
#include "vp8/common/blockd.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
unsigned char *yabove_row,
unsigned char *yleft,
int left_stride,
unsigned char *ypred_ptr,
int y_stride);
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride);
void vp8_init_intra_predictors(void);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_RECONINTRA_H_

Просмотреть файл

@ -1,54 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include "vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "blockd.h"
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
static intra_pred_fn pred[10];
void vp8_init_intra4x4_predictors_internal(void)
{
pred[B_DC_PRED] = vpx_dc_predictor_4x4;
pred[B_TM_PRED] = vpx_tm_predictor_4x4;
pred[B_VE_PRED] = vpx_ve_predictor_4x4;
pred[B_HE_PRED] = vpx_he_predictor_4x4;
pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
pred[B_RD_PRED] = vpx_d135_predictor_4x4;
pred[B_VR_PRED] = vpx_d117_predictor_4x4;
pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
pred[B_HD_PRED] = vpx_d153_predictor_4x4;
pred[B_HU_PRED] = vpx_d207_predictor_4x4;
}
void vp8_intra4x4_predict(unsigned char *above,
unsigned char *yleft, int left_stride,
B_PREDICTION_MODE b_mode,
unsigned char *dst, int dst_stride,
unsigned char top_left)
{
unsigned char Left[4];
unsigned char Aboveb[12], *Above = Aboveb + 4;
Left[0] = yleft[0];
Left[1] = yleft[left_stride];
Left[2] = yleft[2 * left_stride];
Left[3] = yleft[3 * left_stride];
memcpy(Above, above, 8);
Above[-1] = top_left;
pred[b_mode](dst, dst_stride, Above, Left);
}

Просмотреть файл

@ -1,48 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_RECONINTRA4X4_H_
#define VP8_COMMON_RECONINTRA4X4_H_
#include "vp8/common/blockd.h"
#ifdef __cplusplus
extern "C" {
#endif
static void intra_prediction_down_copy(MACROBLOCKD *xd,
unsigned char *above_right_src)
{
int dst_stride = xd->dst.y_stride;
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
unsigned int *src_ptr = (unsigned int *)above_right_src;
unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride);
unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride);
unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride);
*dst_ptr0 = *src_ptr;
*dst_ptr1 = *src_ptr;
*dst_ptr2 = *src_ptr;
}
void vp8_intra4x4_predict(unsigned char *Above,
unsigned char *yleft, int left_stride,
B_PREDICTION_MODE b_mode,
unsigned char *dst, int dst_stride,
unsigned char top_left);
void vp8_init_intra4x4_predictors_internal(void);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_RECONINTRA4X4_H_

Просмотреть файл

@ -1,19 +0,0 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vpx_config.h"
#define RTCD_C
#include "./vp8_rtcd.h"
#include "vpx_ports/vpx_once.h"
void vp8_rtcd()
{
once(setup_rtcd_internal);
}

Просмотреть файл

@ -1,321 +0,0 @@
sub vp8_common_forward_decls() {
print <<EOF
/*
* VP8
*/
struct blockd;
struct macroblockd;
struct loop_filter_info;
/* Encoder forward decls */
struct block;
struct macroblock;
struct variance_vtable;
union int_mv;
struct yv12_buffer_config;
EOF
}
forward_decls qw/vp8_common_forward_decls/;
#
# system state
#
add_proto qw/void vp8_clear_system_state/, "";
specialize qw/vp8_clear_system_state mmx/;
$vp8_clear_system_state_mmx=vpx_reset_mmx_state;
#
# Dequant
#
add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
specialize qw/vp8_dequantize_b mmx media neon msa/;
$vp8_dequantize_b_media=vp8_dequantize_b_v6;
add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
specialize qw/vp8_dequant_idct_add mmx media neon dspr2 msa/;
$vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6;
$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2 msa/;
$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6;
$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2 msa/;
$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6;
$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
#
# Loopfilter
#
add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2 msa/;
$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6;
$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2 msa/;
$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6;
$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2 msa/;
$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6;
$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2 msa/;
$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6;
$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon msa/;
$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx;
$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6;
$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
$vp8_loop_filter_simple_mbv_msa=vp8_loop_filter_simple_vertical_edge_msa;
add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon msa/;
$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c;
$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx;
$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2;
$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6;
$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
$vp8_loop_filter_simple_mbh_msa=vp8_loop_filter_simple_horizontal_edge_msa;
add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon msa/;
$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx;
$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6;
$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
$vp8_loop_filter_simple_bv_msa=vp8_loop_filter_bvs_msa;
add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon msa/;
$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c;
$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx;
$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2;
$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6;
$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon;
$vp8_loop_filter_simple_bh_msa=vp8_loop_filter_bhs_msa;
#
# IDCT
#
#idct16
add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
specialize qw/vp8_short_idct4x4llm mmx media neon dspr2 msa/;
$vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual;
$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;
#iwalsh1
add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;
# no asm yet
#iwalsh16
add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2 msa/;
$vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6;
$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;
#idct1_scalar_add
add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
specialize qw/vp8_dc_only_idct_add mmx media neon dspr2 msa/;
$vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6;
$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
#
# RECON
#
add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2 msa/;
$vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6;
$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;
add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
specialize qw/vp8_copy_mem8x8 mmx media neon dspr2 msa/;
$vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6;
$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;
add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
specialize qw/vp8_copy_mem8x4 mmx media neon dspr2 msa/;
$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
#
# Postproc
#
if (vpx_config("CONFIG_POSTPROC") eq "yes") {
add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
specialize qw/vp8_mbpost_proc_down mmx sse2 msa/;
$vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm;
add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
specialize qw/vp8_mbpost_proc_across_ip sse2 msa/;
$vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm;
add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/;
add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
specialize qw/vp8_plane_add_noise mmx sse2 msa/;
$vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
# no asm yet
add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
# no asm yet
add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
# no asm yet
add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
specialize qw/vp8_filter_by_weight16x16 sse2 msa/;
add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
specialize qw/vp8_filter_by_weight8x8 sse2 msa/;
add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
# no asm yet
}
#
# Subpixel
#
add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2 msa/;
$vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6;
$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;
add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2 msa/;
$vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6;
$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;
add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2 msa/;
$vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817
specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2 msa/;
$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon msa/;
$vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6;
add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon msa/;
$vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6;
add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_bilinear_predict8x4 mmx media neon msa/;
$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
specialize qw/vp8_bilinear_predict4x4 mmx media msa/;
$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
#
# Encoder functions below this point.
#
if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
#
# Block copy
#
if ($opts{arch} =~ /x86/) {
add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n";
specialize qw/vp8_copy32xn sse2 sse3/;
}
#
# Forward DCT
#
add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
specialize qw/vp8_short_fdct4x4 mmx sse2 media neon msa/;
$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
specialize qw/vp8_short_fdct8x4 mmx sse2 media neon msa/;
$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
specialize qw/vp8_short_walsh4x4 sse2 media neon msa/;
$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
#
# Quantizer
#
add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
specialize qw/vp8_regular_quantize_b sse2 sse4_1 msa/;
add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
specialize qw/vp8_fast_quantize_b sse2 ssse3 neon msa/;
#
# Block subtraction
#
add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff";
specialize qw/vp8_block_error mmx sse2 msa/;
$vp8_block_error_sse2=vp8_block_error_xmm;
add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc";
specialize qw/vp8_mbblock_error mmx sse2 msa/;
$vp8_mbblock_error_sse2=vp8_mbblock_error_xmm;
add_proto qw/int vp8_mbuverror/, "struct macroblock *mb";
specialize qw/vp8_mbuverror mmx sse2 msa/;
$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
#
# Motion search
#
add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
specialize qw/vp8_full_search_sad sse3 sse4_1/;
$vp8_full_search_sad_sse3=vp8_full_search_sadx3;
$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8;
add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
specialize qw/vp8_refining_search_sad sse3/;
$vp8_refining_search_sad_sse3=vp8_refining_search_sadx4;
add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4;
#
# Alt-ref Noise Reduction (ARNR)
#
if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count";
specialize qw/vp8_temporal_filter_apply sse2 msa/;
}
#
# Denoiser filter
#
if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
specialize qw/vp8_denoiser_filter sse2 neon msa/;
add_proto qw/int vp8_denoiser_filter_uv/, "unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
specialize qw/vp8_denoiser_filter_uv sse2 neon msa/;
}
# End of encoder only functions
}
1;

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше