drm/amd/display: move FPU associated DSC code to DML folder

[Why & How]
As part of the FPU isolation work documented in
https://patchwork.freedesktop.org/series/93042/, isolate code that uses
FPU in DSC to DML, where all FPU code should locate.

This change does not refactor any functions but move code around.

Cc: Christian König <christian.koenig@amd.com>
Cc: Hersen Wu <hersenxs.wu@amd.com>
Cc: Anson Jacob <Anson.Jacob@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Acked-by: Agustin Gutierrez <agustin.gutierrez@amd.com>
Tested-by: Anson Jacob <Anson.Jacob@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Qingqing Zhuo 2021-08-31 07:52:24 -04:00 коммит произвёл Alex Deucher
Родитель ffd89aa968
Коммит d738db6883
8 изменённых файлов: 389 добавлений и 338 удалений

Просмотреть файл

@ -70,6 +70,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
@ -84,6 +85,7 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcfla
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags)
endif
CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
@ -99,6 +101,7 @@ DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o
DML += dsc/rc_calc_fpu.o
endif
AMD_DAL_DML = $(addprefix $(AMDDALPATH)/dc/dml/,$(DML))

Просмотреть файл

@ -0,0 +1,291 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#include "rc_calc_fpu.h"
#include "qp_tables.h"
#include "amdgpu_dm/dc_fpu.h"
#define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
#define MODE_SELECT(val444, val422, val420) \
(cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
#define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \
table = qp_table_##mode##_##bpc##bpc_##max; \
table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
break
static int median3(int a, int b, int c)
{
if (a > b)
swap(a, b);
if (b > c)
swap(b, c);
if (a > b)
swap(b, c);
return b;
}
static double dsc_roundf(double num)
{
if (num < 0.0)
num = num - 0.5;
else
num = num + 0.5;
return (int)(num);
}
static double dsc_ceil(double num)
{
double retval = (int)num;
if (retval != num && num > 0)
retval = num + 1;
return (int)retval;
}
static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
enum max_min max_min, float bpp)
{
int mode = MODE_SELECT(444, 422, 420);
int sel = table_hash(mode, bpc, max_min);
int table_size = 0;
int index;
const struct qp_entry *table = 0L;
// alias enum
enum { min = DAL_MM_MIN, max = DAL_MM_MAX };
switch (sel) {
TABLE_CASE(444, 8, max);
TABLE_CASE(444, 8, min);
TABLE_CASE(444, 10, max);
TABLE_CASE(444, 10, min);
TABLE_CASE(444, 12, max);
TABLE_CASE(444, 12, min);
TABLE_CASE(422, 8, max);
TABLE_CASE(422, 8, min);
TABLE_CASE(422, 10, max);
TABLE_CASE(422, 10, min);
TABLE_CASE(422, 12, max);
TABLE_CASE(422, 12, min);
TABLE_CASE(420, 8, max);
TABLE_CASE(420, 8, min);
TABLE_CASE(420, 10, max);
TABLE_CASE(420, 10, min);
TABLE_CASE(420, 12, max);
TABLE_CASE(420, 12, min);
}
if (table == 0)
return;
index = (bpp - table[0].bpp) * 2;
/* requested size is bigger than the table */
if (index >= table_size) {
dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
return;
}
memcpy(qps, table[index].qps, sizeof(qp_set));
}
static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
{
int *p = ofs;
if (mode == CM_444 || mode == CM_RGB) {
*p++ = (bpp <= 6) ? (0) : ((((bpp >= 8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
*p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
*p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
*p++ = (bpp <= 6) ? (-4) : ((((bpp >= 8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
*p++ = (bpp <= 6) ? (-6) : ((((bpp >= 8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
*p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
*p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
*p++ = -10;
*p++ = (bpp <= 6) ? (-12) : ((bpp >= 8) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2 / 2.0))));
*p++ = -12;
*p++ = -12;
*p++ = -12;
} else if (mode == CM_422) {
*p++ = (bpp <= 8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp - 8) * (8 / 2.0))));
*p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp - 8) * (8 / 2.0))));
*p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp - 8) * (4 / 2.0))));
*p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp - 8) * (2 / 2.0))));
*p++ = (bpp <= 8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp - 8) * (2 / 2.0))));
*p++ = -10;
*p++ = (bpp <= 6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2.0 / 1))));
*p++ = -12;
*p++ = -12;
*p++ = -12;
} else {
*p++ = (bpp <= 6) ? (2) : ((bpp >= 8) ? (10) : (2 + dsc_roundf((bpp - 6) * (8 / 2.0))));
*p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (8) : (0 + dsc_roundf((bpp - 6) * (8 / 2.0))));
*p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (6) : (0 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-2) : ((bpp >= 8) ? (4) : (-2 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-4) : ((bpp >= 8) ? (2) : (-4 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-6) : ((bpp >= 8) ? (0) : (-6 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-2) : (-8 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-4) : (-8 + dsc_roundf((bpp - 6) * (4 / 2.0))));
*p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-6) : (-8 + dsc_roundf((bpp - 6) * (2 / 2.0))));
*p++ = (bpp <= 6) ? (-10) : ((bpp >= 8) ? (-8) : (-10 + dsc_roundf((bpp - 6) * (2 / 2.0))));
*p++ = -10;
*p++ = (bpp <= 4) ? (-12) : ((bpp >= 5) ? (-10) : (-12 + dsc_roundf((bpp - 4) * (2 / 1.0))));
*p++ = -12;
*p++ = -12;
*p++ = -12;
}
}
void _do_calc_rc_params(struct rc_params *rc,
enum colour_mode cm,
enum bits_per_comp bpc,
u16 drm_bpp,
bool is_navite_422_or_420,
int slice_width,
int slice_height,
int minor_version)
{
float bpp;
float bpp_group;
float initial_xmit_delay_factor;
int padding_pixels;
int i;
dc_assert_fp_enabled();
bpp = ((float)drm_bpp / 16.0);
/* in native_422 or native_420 modes, the bits_per_pixel is double the
* target bpp (the latter is what calc_rc_params expects)
*/
if (is_navite_422_or_420)
bpp /= 2.0;
rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
switch (cm) {
case CM_420:
rc->initial_fullness_offset = (bpp >= 6) ? (2048) : ((bpp <= 4) ? (6144) : ((((bpp > 4) && (bpp <= 5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp - 5) * (3584)))));
rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
rc->second_line_bpg_offset = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
break;
case CM_422:
rc->initial_fullness_offset = (bpp >= 8) ? (2048) : ((bpp <= 7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
rc->second_line_bpg_offset = 0;
break;
case CM_444:
case CM_RGB:
rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <= 8) ? (6144) : ((((bpp > 8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
rc->second_line_bpg_offset = 0;
break;
}
initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
if (cm == CM_422 || cm == CM_420)
slice_width /= 2;
padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
rc->initial_xmit_delay++;
}
rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
rc->flatness_det_thresh = 2 << (bpc - 8);
get_qp_set(rc->qp_min, cm, bpc, DAL_MM_MIN, bpp);
get_qp_set(rc->qp_max, cm, bpc, DAL_MM_MAX, bpp);
if (cm == CM_444 && minor_version == 1) {
for (i = 0; i < QP_SET_SIZE; ++i) {
rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
}
}
get_ofs_set(rc->ofs, cm, bpp);
/* fixed parameters */
rc->rc_model_size = 8192;
rc->rc_edge_factor = 6;
rc->rc_tgt_offset_hi = 3;
rc->rc_tgt_offset_lo = 3;
rc->rc_buf_thresh[0] = 896;
rc->rc_buf_thresh[1] = 1792;
rc->rc_buf_thresh[2] = 2688;
rc->rc_buf_thresh[3] = 3584;
rc->rc_buf_thresh[4] = 4480;
rc->rc_buf_thresh[5] = 5376;
rc->rc_buf_thresh[6] = 6272;
rc->rc_buf_thresh[7] = 6720;
rc->rc_buf_thresh[8] = 7168;
rc->rc_buf_thresh[9] = 7616;
rc->rc_buf_thresh[10] = 7744;
rc->rc_buf_thresh[11] = 7872;
rc->rc_buf_thresh[12] = 8000;
rc->rc_buf_thresh[13] = 8064;
}
u32 _do_bytes_per_pixel_calc(int slice_width,
u16 drm_bpp,
bool is_navite_422_or_420)
{
float bpp;
u32 bytes_per_pixel;
double d_bytes_per_pixel;
dc_assert_fp_enabled();
bpp = ((float)drm_bpp / 16.0);
d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
// TODO: Make sure the formula for calculating this is precise (ceiling
// vs. floor, and at what point they should be applied)
if (is_navite_422_or_420)
d_bytes_per_pixel /= 2;
bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
return bytes_per_pixel;
}

Просмотреть файл

@ -0,0 +1,94 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#ifndef __RC_CALC_FPU_H__
#define __RC_CALC_FPU_H__
#include "os_types.h"
#include <drm/drm_dsc.h>
#define QP_SET_SIZE 15
typedef int qp_set[QP_SET_SIZE];
struct rc_params {
int rc_quant_incr_limit0;
int rc_quant_incr_limit1;
int initial_fullness_offset;
int initial_xmit_delay;
int first_line_bpg_offset;
int second_line_bpg_offset;
int flatness_min_qp;
int flatness_max_qp;
int flatness_det_thresh;
qp_set qp_min;
qp_set qp_max;
qp_set ofs;
int rc_model_size;
int rc_edge_factor;
int rc_tgt_offset_hi;
int rc_tgt_offset_lo;
int rc_buf_thresh[QP_SET_SIZE - 1];
};
enum colour_mode {
CM_RGB, /* 444 RGB */
CM_444, /* 444 YUV or simple 422 */
CM_422, /* native 422 */
CM_420 /* native 420 */
};
enum bits_per_comp {
BPC_8 = 8,
BPC_10 = 10,
BPC_12 = 12
};
enum max_min {
DAL_MM_MIN = 0,
DAL_MM_MAX = 1
};
struct qp_entry {
float bpp;
const qp_set qps;
};
typedef struct qp_entry qp_table[];
u32 _do_bytes_per_pixel_calc(int slice_width,
u16 drm_bpp,
bool is_navite_422_or_420);
void _do_calc_rc_params(struct rc_params *rc,
enum colour_mode cm,
enum bits_per_comp bpc,
u16 drm_bpp,
bool is_navite_422_or_420,
int slice_width,
int slice_height,
int minor_version);
#endif

Просмотреть файл

@ -1,35 +1,6 @@
# SPDX-License-Identifier: MIT
#
# Makefile for the 'dsc' sub-component of DAL.
ifdef CONFIG_X86
dsc_ccflags := -mhard-float -msse
endif
ifdef CONFIG_PPC64
dsc_ccflags := -mhard-float -maltivec
endif
ifdef CONFIG_CC_IS_GCC
ifeq ($(call cc-ifversion, -lt, 0701, y), y)
IS_OLD_GCC = 1
endif
endif
ifdef CONFIG_X86
ifdef IS_OLD_GCC
# Stack alignment mismatch, proceed with caution.
# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
# (8B stack alignment).
dsc_ccflags += -mpreferred-stack-boundary=4
else
dsc_ccflags += -msse2
endif
endif
CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_rcflags)
DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
AMD_DAL_DSC = $(addprefix $(AMDDALPATH)/dc/dsc/,$(DSC))

Просмотреть файл

@ -23,266 +23,7 @@
* Authors: AMD
*
*/
#include <drm/drm_dsc.h>
#include "os_types.h"
#include "rc_calc.h"
#include "qp_tables.h"
#define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
#define MODE_SELECT(val444, val422, val420) \
(cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
#define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \
table = qp_table_##mode##_##bpc##bpc_##max; \
table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
break
static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
enum max_min max_min, float bpp)
{
int mode = MODE_SELECT(444, 422, 420);
int sel = table_hash(mode, bpc, max_min);
int table_size = 0;
int index;
const struct qp_entry *table = 0L;
// alias enum
enum { min = DAL_MM_MIN, max = DAL_MM_MAX };
switch (sel) {
TABLE_CASE(444, 8, max);
TABLE_CASE(444, 8, min);
TABLE_CASE(444, 10, max);
TABLE_CASE(444, 10, min);
TABLE_CASE(444, 12, max);
TABLE_CASE(444, 12, min);
TABLE_CASE(422, 8, max);
TABLE_CASE(422, 8, min);
TABLE_CASE(422, 10, max);
TABLE_CASE(422, 10, min);
TABLE_CASE(422, 12, max);
TABLE_CASE(422, 12, min);
TABLE_CASE(420, 8, max);
TABLE_CASE(420, 8, min);
TABLE_CASE(420, 10, max);
TABLE_CASE(420, 10, min);
TABLE_CASE(420, 12, max);
TABLE_CASE(420, 12, min);
}
if (table == 0)
return;
index = (bpp - table[0].bpp) * 2;
/* requested size is bigger than the table */
if (index >= table_size) {
dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
return;
}
memcpy(qps, table[index].qps, sizeof(qp_set));
}
static double dsc_roundf(double num)
{
if (num < 0.0)
num = num - 0.5;
else
num = num + 0.5;
return (int)(num);
}
static double dsc_ceil(double num)
{
double retval = (int)num;
if (retval != num && num > 0)
retval = num + 1;
return (int)retval;
}
static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
{
int *p = ofs;
if (mode == CM_444 || mode == CM_RGB) {
*p++ = (bpp <= 6) ? (0) : ((((bpp >= 8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
*p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
*p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
*p++ = (bpp <= 6) ? (-4) : ((((bpp >= 8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
*p++ = (bpp <= 6) ? (-6) : ((((bpp >= 8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
*p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
*p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
*p++ = -10;
*p++ = (bpp <= 6) ? (-12) : ((bpp >= 8) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2 / 2.0))));
*p++ = -12;
*p++ = -12;
*p++ = -12;
} else if (mode == CM_422) {
*p++ = (bpp <= 8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp - 8) * (8 / 2.0))));
*p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp - 8) * (8 / 2.0))));
*p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp - 8) * (6 / 2.0))));
*p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp - 8) * (4 / 2.0))));
*p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp - 8) * (2 / 2.0))));
*p++ = (bpp <= 8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp - 8) * (2 / 2.0))));
*p++ = -10;
*p++ = (bpp <= 6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2.0 / 1))));
*p++ = -12;
*p++ = -12;
*p++ = -12;
} else {
*p++ = (bpp <= 6) ? (2) : ((bpp >= 8) ? (10) : (2 + dsc_roundf((bpp - 6) * (8 / 2.0))));
*p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (8) : (0 + dsc_roundf((bpp - 6) * (8 / 2.0))));
*p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (6) : (0 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-2) : ((bpp >= 8) ? (4) : (-2 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-4) : ((bpp >= 8) ? (2) : (-4 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-6) : ((bpp >= 8) ? (0) : (-6 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-2) : (-8 + dsc_roundf((bpp - 6) * (6 / 2.0))));
*p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-4) : (-8 + dsc_roundf((bpp - 6) * (4 / 2.0))));
*p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-6) : (-8 + dsc_roundf((bpp - 6) * (2 / 2.0))));
*p++ = (bpp <= 6) ? (-10) : ((bpp >= 8) ? (-8) : (-10 + dsc_roundf((bpp - 6) * (2 / 2.0))));
*p++ = -10;
*p++ = (bpp <= 4) ? (-12) : ((bpp >= 5) ? (-10) : (-12 + dsc_roundf((bpp - 4) * (2 / 1.0))));
*p++ = -12;
*p++ = -12;
*p++ = -12;
}
}
static int median3(int a, int b, int c)
{
if (a > b)
swap(a, b);
if (b > c)
swap(b, c);
if (a > b)
swap(b, c);
return b;
}
static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm,
enum bits_per_comp bpc, u16 drm_bpp,
bool is_navite_422_or_420,
int slice_width, int slice_height,
int minor_version)
{
float bpp;
float bpp_group;
float initial_xmit_delay_factor;
int padding_pixels;
int i;
bpp = ((float)drm_bpp / 16.0);
/* in native_422 or native_420 modes, the bits_per_pixel is double the
* target bpp (the latter is what calc_rc_params expects)
*/
if (is_navite_422_or_420)
bpp /= 2.0;
rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
switch (cm) {
case CM_420:
rc->initial_fullness_offset = (bpp >= 6) ? (2048) : ((bpp <= 4) ? (6144) : ((((bpp > 4) && (bpp <= 5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp - 5) * (3584)))));
rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
rc->second_line_bpg_offset = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
break;
case CM_422:
rc->initial_fullness_offset = (bpp >= 8) ? (2048) : ((bpp <= 7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
rc->second_line_bpg_offset = 0;
break;
case CM_444:
case CM_RGB:
rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <= 8) ? (6144) : ((((bpp > 8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
rc->second_line_bpg_offset = 0;
break;
}
initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
if (cm == CM_422 || cm == CM_420)
slice_width /= 2;
padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
rc->initial_xmit_delay++;
}
rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
rc->flatness_det_thresh = 2 << (bpc - 8);
get_qp_set(rc->qp_min, cm, bpc, DAL_MM_MIN, bpp);
get_qp_set(rc->qp_max, cm, bpc, DAL_MM_MAX, bpp);
if (cm == CM_444 && minor_version == 1) {
for (i = 0; i < QP_SET_SIZE; ++i) {
rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
}
}
get_ofs_set(rc->ofs, cm, bpp);
/* fixed parameters */
rc->rc_model_size = 8192;
rc->rc_edge_factor = 6;
rc->rc_tgt_offset_hi = 3;
rc->rc_tgt_offset_lo = 3;
rc->rc_buf_thresh[0] = 896;
rc->rc_buf_thresh[1] = 1792;
rc->rc_buf_thresh[2] = 2688;
rc->rc_buf_thresh[3] = 3584;
rc->rc_buf_thresh[4] = 4480;
rc->rc_buf_thresh[5] = 5376;
rc->rc_buf_thresh[6] = 6272;
rc->rc_buf_thresh[7] = 6720;
rc->rc_buf_thresh[8] = 7168;
rc->rc_buf_thresh[9] = 7616;
rc->rc_buf_thresh[10] = 7744;
rc->rc_buf_thresh[11] = 7872;
rc->rc_buf_thresh[12] = 8000;
rc->rc_buf_thresh[13] = 8064;
}
static u32 _do_bytes_per_pixel_calc(int slice_width, u16 drm_bpp,
bool is_navite_422_or_420)
{
float bpp;
u32 bytes_per_pixel;
double d_bytes_per_pixel;
bpp = ((float)drm_bpp / 16.0);
d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
// TODO: Make sure the formula for calculating this is precise (ceiling
// vs. floor, and at what point they should be applied)
if (is_navite_422_or_420)
d_bytes_per_pixel /= 2;
bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
return bytes_per_pixel;
}
/**
* calc_rc_params - reads the user's cmdline mode

Просмотреть файл

@ -27,55 +27,7 @@
#ifndef __RC_CALC_H__
#define __RC_CALC_H__
#define QP_SET_SIZE 15
typedef int qp_set[QP_SET_SIZE];
struct rc_params {
int rc_quant_incr_limit0;
int rc_quant_incr_limit1;
int initial_fullness_offset;
int initial_xmit_delay;
int first_line_bpg_offset;
int second_line_bpg_offset;
int flatness_min_qp;
int flatness_max_qp;
int flatness_det_thresh;
qp_set qp_min;
qp_set qp_max;
qp_set ofs;
int rc_model_size;
int rc_edge_factor;
int rc_tgt_offset_hi;
int rc_tgt_offset_lo;
int rc_buf_thresh[QP_SET_SIZE - 1];
};
enum colour_mode {
CM_RGB, /* 444 RGB */
CM_444, /* 444 YUV or simple 422 */
CM_422, /* native 422 */
CM_420 /* native 420 */
};
enum bits_per_comp {
BPC_8 = 8,
BPC_10 = 10,
BPC_12 = 12
};
enum max_min {
DAL_MM_MIN = 0,
DAL_MM_MAX = 1
};
struct qp_entry {
float bpp;
const qp_set qps;
};
typedef struct qp_entry qp_table[];
#include "dml/dsc/rc_calc_fpu.h"
void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps);
u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps);

Просмотреть файл

@ -22,7 +22,6 @@
* Authors: AMD
*
*/
#include "os_types.h"
#include <drm/drm_dsc.h>
#include "dscc_types.h"
#include "rc_calc.h"