amd-drm-next-6.2-2022-12-02:

amdgpu:
- Fix CPU stalls when allocating large amounts of system memory
- SR-IOV fixes
- BACO fixes
- Enable GC 11.0.4
- Enable PSP 13.0.11
- Enable SMU 13.0.11
- Enable NBIO 7.7.1
- Fix reported VCN capabilities for RDNA2
- Misc cleanups
- PCI ref count fixes
- DCN DPIA fixes
- DCN 3.2.x fixes
- Documentation updates
- GC 11.x fixes
- VCN RAS fixes
- APU fix for passthrough
- PSR fixes
- GFX preemption support for gfx9
- SDMA fix for S0ix

amdkfd:
- Enable KFD support for GC 11.0.4
- Misc cleanups
- Fix memory leak

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221202160659.5987-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2022-12-09 12:08:12 +10:00
Родитель f06a4da3ad 4670ac706f
Коммит 9e5737bd04
67 изменённых файлов: 1697 добавлений и 364 удалений

Просмотреть файл

@ -30,12 +30,35 @@ we have a dedicated glossary for Display Core at
EOP
End Of Pipe/Pipeline
GART
Graphics Address Remapping Table. This is the name we use for the GPUVM
page table used by the GPU kernel driver. It remaps system resources
(memory or MMIO space) into the GPU's address space so the GPU can access
them. The name GART harkens back to the days of AGP when the platform
provided an MMU that the GPU could use to get a contiguous view of
scattered pages for DMA. The MMU has since moved on to the GPU, but the
name stuck.
GC
Graphics and Compute
GMC
Graphic Memory Controller
GPUVM
GPU Virtual Memory. This is the GPU's MMU. The GPU supports multiple
virtual address spaces that can be in flight at any given time. These
allow the GPU to remap VRAM and system resources into GPU virtual address
spaces for use by the GPU kernel driver and applications using the GPU.
These provide memory protection for different applications using the GPU.
GTT
Graphics Translation Tables. This is a memory pool managed through TTM
which provides access to system resources (memory or MMIO space) for
use by the GPU. These addresses can be mapped into the "GART" GPUVM page
table for use by the kernel driver or into per process GPUVM page tables
for application usage.
IH
Interrupt Handler

Просмотреть файл

@ -148,10 +148,10 @@ PRIME Buffer Sharing
MMU Notifier
============
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
:doc: MMU Notifier
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
:internal:
AMDGPU Virtual Memory

Просмотреть файл

@ -3,7 +3,7 @@
==========================
The drm/amdgpu driver supports all AMD Radeon GPUs based on the Graphics Core
Next (GCN) architecture.
Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures.
.. toctree::

Просмотреть файл

@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
amdgpu_ring_mux.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o

Просмотреть файл

@ -2204,7 +2204,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
if (ret) {
kfree(mem);
kfree(*mem);
return ret;
}

Просмотреть файл

@ -104,7 +104,7 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes)
{
uint32_t start_addr, fw_size, drv_size;
u32 start_addr, fw_size, drv_size;
start_addr = le32_to_cpu(fw_usage->start_address_in_kb);
fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
@ -116,7 +116,7 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
drv_size);
if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
(u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
/* Firmware request VRAM reservation for SR-IOV */
adev->mman.fw_vram_usage_start_offset = (start_addr &
@ -133,7 +133,7 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes)
{
uint32_t fw_start_addr, fw_size, drv_start_addr, drv_size;
u32 fw_start_addr, fw_size, drv_start_addr, drv_size;
fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb);
fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
@ -147,14 +147,18 @@ static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
drv_start_addr,
drv_size);
if ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) {
if (amdgpu_sriov_vf(adev) &&
((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
/* Firmware request VRAM reservation for SR-IOV */
adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
adev->mman.fw_vram_usage_size = fw_size << 10;
}
if ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) {
if (amdgpu_sriov_vf(adev) &&
((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
/* driver request VRAM reservation for SR-IOV */
adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
@ -172,8 +176,8 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
vram_usagebyfirmware);
struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1;
struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2;
uint16_t data_offset;
uint8_t frev, crev;
u16 data_offset;
u8 frev, crev;
int usage_bytes = 0;
if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {

Просмотреть файл

@ -317,6 +317,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
if (!found)
return false;
pci_dev_put(pdev);
adev->bios = kmalloc(size, GFP_KERNEL);
if (!adev->bios) {

Просмотреть файл

@ -2473,6 +2473,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev)) {
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
if (WARN_ON(!hive)) {
r = -ENOENT;
goto init_failed;
}
if (!hive->reset_domain ||
!amdgpu_reset_get_reset_domain(hive->reset_domain)) {
r = -ENOENT;

Просмотреть файл

@ -1512,6 +1512,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
break;
default:
@ -1556,6 +1557,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
break;
default:
@ -1641,6 +1643,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
break;
case IP_VERSION(13, 0, 4):
@ -1691,6 +1694,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
default:
@ -1804,6 +1808,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
break;
default:
@ -1967,6 +1972,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
adev->enable_mes = true;
adev->enable_mes_kiq = true;
@ -2197,6 +2203,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->family = AMDGPU_FAMILY_GC_11_0_0;
break;
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
adev->family = AMDGPU_FAMILY_GC_11_0_1;
break;
default:
@ -2214,6 +2221,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 7):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
adev->flags |= AMD_IS_APU;
break;
default:
@ -2270,6 +2278,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;
break;
case IP_VERSION(7, 7, 0):
case IP_VERSION(7, 7, 1):
adev->nbio.funcs = &nbio_v7_7_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
break;

Просмотреть файл

@ -231,16 +231,18 @@ module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
/**
* DOC: gartsize (uint)
* Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic).
* Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing.
* The default is -1 (The size depends on asic).
*/
MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)");
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
/**
* DOC: gttsize (int)
* Restrict the size of GTT domain in MiB for testing. The default is -1 (Use 1/2 RAM, minimum value is 3GB).
* Restrict the size of GTT domain (for userspace use) in MiB for testing.
* The default is -1 (Use 1/2 RAM, minimum value is 3GB).
*/
MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
/**
@ -2569,6 +2571,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
amdgpu_device_baco_enter(drm_dev);
}
dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
return 0;
}

Просмотреть файл

@ -55,6 +55,7 @@ struct amdgpu_fence {
/* RB, DMA, etc. */
struct amdgpu_ring *ring;
ktime_t start_timestamp;
};
static struct kmem_cache *amdgpu_fence_slab;
@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
}
}
to_amdgpu_fence(fence)->start_timestamp = ktime_get();
/* This function can't be called concurrently anyway, otherwise
* emitting the fence would mess up the hardware ring buffer.
*/
@ -406,6 +409,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
return lower_32_bits(emitted);
}
/**
* amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
* @ring: ring the fence is associated with
*
* Find the earliest fence unsignaled until now, calculate the time delta
* between the time fence emitted and now.
*/
u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
{
struct amdgpu_fence_driver *drv = &ring->fence_drv;
struct dma_fence *fence;
uint32_t last_seq, sync_seq;
last_seq = atomic_read(&ring->fence_drv.last_seq);
sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
if (last_seq == sync_seq)
return 0;
++last_seq;
last_seq &= drv->num_fences_mask;
fence = drv->fences[last_seq];
if (!fence)
return 0;
return ktime_us_delta(ktime_get(),
to_amdgpu_fence(fence)->start_timestamp);
}
/**
* amdgpu_fence_update_start_timestamp - update the timestamp of the fence
* @ring: ring the fence is associated with
* @seq: the fence seq number to update.
* @timestamp: the start timestamp to update.
*
* The function called at the time the fence and related ib is about to
* resubmit to gpu in MCBP scenario. Thus we do not consider race condition
* with amdgpu_fence_process to modify the same fence.
*/
void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
{
struct amdgpu_fence_driver *drv = &ring->fence_drv;
struct dma_fence *fence;
seq &= drv->num_fences_mask;
fence = drv->fences[seq];
if (!fence)
return;
to_amdgpu_fence(fence)->start_timestamp = timestamp;
}
/**
* amdgpu_fence_driver_start_ring - make the fence driver
* ready for use on the requested ring.

Просмотреть файл

@ -33,6 +33,7 @@
#include "amdgpu_imu.h"
#include "soc15.h"
#include "amdgpu_ras.h"
#include "amdgpu_ring_mux.h"
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
@ -352,6 +353,9 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras *ras;
bool is_poweron;
struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux muxer;
};
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))

Просмотреть файл

@ -51,6 +51,8 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_hmm.h"
#define MAX_WALK_BYTE (2UL << 30)
/**
* amdgpu_hmm_invalidate_gfx - callback to notify about mm change
*
@ -163,6 +165,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
struct hmm_range **phmm_range)
{
struct hmm_range *hmm_range;
unsigned long end;
unsigned long timeout;
unsigned long i;
unsigned long *pfns;
@ -184,25 +187,42 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
hmm_range->hmm_pfns = pfns;
hmm_range->start = start;
hmm_range->end = start + npages * PAGE_SIZE;
end = start + npages * PAGE_SIZE;
hmm_range->dev_private_owner = owner;
/* Assuming 512MB takes maxmium 1 second to fault page address */
timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
timeout = jiffies + msecs_to_jiffies(timeout);
do {
hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
hmm_range->start, hmm_range->end);
/* Assuming 512MB takes maxmium 1 second to fault page address */
timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL);
timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
timeout = jiffies + msecs_to_jiffies(timeout);
retry:
hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
r = hmm_range_fault(hmm_range);
if (unlikely(r)) {
/*
* FIXME: This timeout should encompass the retry from
* mmu_interval_read_retry() as well.
*/
if (r == -EBUSY && !time_after(jiffies, timeout))
goto retry;
goto out_free_pfns;
}
hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
r = hmm_range_fault(hmm_range);
if (unlikely(r)) {
/*
* FIXME: This timeout should encompass the retry from
* mmu_interval_read_retry() as well.
*/
if (r == -EBUSY && !time_after(jiffies, timeout))
goto retry;
goto out_free_pfns;
}
if (hmm_range->end == end)
break;
hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
hmm_range->start = hmm_range->end;
schedule();
} while (hmm_range->end < end);
hmm_range->start = start;
hmm_range->hmm_pfns = pfns;
/*
* Due to default_flags, all pages are HMM_PFN_VALID or

Просмотреть файл

@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
}
amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
}

Просмотреть файл

@ -431,7 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
if (adev->vcn.harvest_config & (1 << i))
continue;
if (adev->vcn.inst[i].ring_dec.sched.ready)
@ -443,7 +443,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
if (adev->vcn.harvest_config & (1 << i))
continue;
for (j = 0; j < adev->vcn.num_enc_rings; j++)

Просмотреть файл

@ -165,6 +165,7 @@ static int psp_early_init(void *handle)
case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
psp_v13_0_set_psp_funcs(psp);
psp->autoload_supported = true;
break;
@ -512,26 +513,22 @@ static int psp_sw_fini(void *handle)
struct psp_gfx_cmd_resp *cmd = psp->cmd;
psp_memory_training_fini(psp);
if (psp->sos_fw) {
release_firmware(psp->sos_fw);
psp->sos_fw = NULL;
}
if (psp->asd_fw) {
release_firmware(psp->asd_fw);
psp->asd_fw = NULL;
}
if (psp->ta_fw) {
release_firmware(psp->ta_fw);
psp->ta_fw = NULL;
}
if (psp->cap_fw) {
release_firmware(psp->cap_fw);
psp->cap_fw = NULL;
}
if (psp->toc_fw) {
release_firmware(psp->toc_fw);
psp->toc_fw = NULL;
}
release_firmware(psp->sos_fw);
psp->sos_fw = NULL;
release_firmware(psp->asd_fw);
psp->asd_fw = NULL;
release_firmware(psp->ta_fw);
psp->ta_fw = NULL;
release_firmware(psp->cap_fw);
psp->cap_fw = NULL;
release_firmware(psp->toc_fw);
psp->toc_fw = NULL;
if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7))
psp_sysfs_fini(adev);
@ -861,7 +858,7 @@ static int psp_tmr_unload(struct psp_context *psp)
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
psp_prep_tmr_unload_cmd_buf(psp, cmd);
dev_info(psp->adev->dev, "free PSP TMR buffer\n");
dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);

Просмотреть файл

@ -1267,7 +1267,7 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, features_attr);
return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
}
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)

Просмотреть файл

@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
}
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
{
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_begin(ring);
}
void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
{
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_end(ring);
}

Просмотреть файл

@ -39,6 +39,7 @@ struct amdgpu_vm;
#define AMDGPU_MAX_RINGS 28
#define AMDGPU_MAX_HWIP_RINGS 8
#define AMDGPU_MAX_GFX_RINGS 2
#define AMDGPU_MAX_SW_GFX_RINGS 2
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
#define AMDGPU_MAX_UVD_ENC_RINGS 2
@ -59,6 +60,7 @@ enum amdgpu_ring_priority_level {
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
#define AMDGPU_FENCE_FLAG_EXEC (1 << 3)
#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
@ -143,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
ktime_t timestamp);
/*
* Rings.
*/
@ -279,6 +286,10 @@ struct amdgpu_ring {
bool is_mes_queue;
uint32_t hw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
bool is_sw_ring;
unsigned int entry_index;
};
#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
@ -307,6 +318,9 @@ struct amdgpu_ring {
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring);

Просмотреть файл

@ -0,0 +1,516 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/slab.h>
#include <drm/drm_print.h>
#include "amdgpu_ring_mux.h"
#include "amdgpu_ring.h"
#include "amdgpu.h"
#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
static const struct ring_info {
unsigned int hw_pio;
const char *ring_name;
} sw_ring_info[] = {
{ AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
{ AMDGPU_RING_PRIO_2, "gfx_high"},
};
static struct kmem_cache *amdgpu_mux_chunk_slab;
static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux,
struct amdgpu_ring *ring)
{
return ring->entry_index < mux->ring_entry_size ?
&mux->ring_entry[ring->entry_index] : NULL;
}
/* copy packages on sw ring range[begin, end) */
static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
struct amdgpu_ring *ring,
u64 s_start, u64 s_end)
{
u64 start, end;
struct amdgpu_ring *real_ring = mux->real_ring;
start = s_start & ring->buf_mask;
end = s_end & ring->buf_mask;
if (start == end) {
DRM_ERROR("no more data copied from sw ring\n");
return;
}
if (start > end) {
amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start);
amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start],
(ring->ring_size >> 2) - start);
amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
} else {
amdgpu_ring_alloc(real_ring, end - start);
amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start);
}
}
static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
{
struct amdgpu_mux_entry *e = NULL;
struct amdgpu_mux_chunk *chunk;
uint32_t seq, last_seq;
int i;
/*find low priority entries:*/
if (!mux->s_resubmit)
return;
for (i = 0; i < mux->num_ring_entries; i++) {
if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
e = &mux->ring_entry[i];
break;
}
}
if (!e) {
DRM_ERROR("%s no low priority ring found\n", __func__);
return;
}
last_seq = atomic_read(&e->ring->fence_drv.last_seq);
seq = mux->seqno_to_resubmit;
if (last_seq < seq) {
/*resubmit all the fences between (last_seq, seq]*/
list_for_each_entry(chunk, &e->list, entry) {
if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
amdgpu_fence_update_start_timestamp(e->ring,
chunk->sync_seq,
ktime_get());
amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
chunk->start,
chunk->end);
mux->wptr_resubmit = chunk->end;
amdgpu_ring_commit(mux->real_ring);
}
}
}
del_timer(&mux->resubmit_timer);
mux->s_resubmit = false;
}
static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
{
mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
}
static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
{
struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
if (!spin_trylock(&mux->lock)) {
amdgpu_ring_mux_schedule_resubmit(mux);
DRM_ERROR("reschedule resubmit\n");
return;
}
amdgpu_mux_resubmit_chunks(mux);
spin_unlock(&mux->lock);
}
int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
unsigned int entry_size)
{
mux->real_ring = ring;
mux->num_ring_entries = 0;
mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
if (!mux->ring_entry)
return -ENOMEM;
mux->ring_entry_size = entry_size;
mux->s_resubmit = false;
amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk",
sizeof(struct amdgpu_mux_chunk), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!amdgpu_mux_chunk_slab) {
DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
return -ENOMEM;
}
spin_lock_init(&mux->lock);
timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
return 0;
}
void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
{
struct amdgpu_mux_entry *e;
struct amdgpu_mux_chunk *chunk, *chunk2;
int i;
for (i = 0; i < mux->num_ring_entries; i++) {
e = &mux->ring_entry[i];
list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
list_del(&chunk->entry);
kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
}
}
kmem_cache_destroy(amdgpu_mux_chunk_slab);
kfree(mux->ring_entry);
mux->ring_entry = NULL;
mux->num_ring_entries = 0;
mux->ring_entry_size = 0;
}
int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{
struct amdgpu_mux_entry *e;
if (mux->num_ring_entries >= mux->ring_entry_size) {
DRM_ERROR("add sw ring exceeding max entry size\n");
return -ENOENT;
}
e = &mux->ring_entry[mux->num_ring_entries];
ring->entry_index = mux->num_ring_entries;
e->ring = ring;
INIT_LIST_HEAD(&e->list);
mux->num_ring_entries += 1;
return 0;
}
void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
{
struct amdgpu_mux_entry *e;
spin_lock(&mux->lock);
if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
amdgpu_mux_resubmit_chunks(mux);
e = amdgpu_ring_mux_sw_entry(mux, ring);
if (!e) {
DRM_ERROR("cannot find entry for sw ring\n");
spin_unlock(&mux->lock);
return;
}
/* We could skip this set wptr as preemption in process. */
if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
spin_unlock(&mux->lock);
return;
}
e->sw_cptr = e->sw_wptr;
/* Update cptr if the package already copied in resubmit functions */
if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
e->sw_cptr = mux->wptr_resubmit;
e->sw_wptr = wptr;
e->start_ptr_in_hw_ring = mux->real_ring->wptr;
/* Skip copying for the packages already resubmitted.*/
if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) {
amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr);
e->end_ptr_in_hw_ring = mux->real_ring->wptr;
amdgpu_ring_commit(mux->real_ring);
} else {
e->end_ptr_in_hw_ring = mux->real_ring->wptr;
}
spin_unlock(&mux->lock);
}
u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{
struct amdgpu_mux_entry *e;
e = amdgpu_ring_mux_sw_entry(mux, ring);
if (!e) {
DRM_ERROR("cannot find entry for sw ring\n");
return 0;
}
return e->sw_wptr;
}
/**
* amdgpu_ring_mux_get_rptr - get the readptr of the software ring
* @mux: the multiplexer the software rings attach to
* @ring: the software ring of which we calculate the readptr
*
* The return value of the readptr is not precise while the other rings could
* write data onto the real ring buffer.After overwriting on the real ring, we
* can not decide if our packages have been excuted or not read yet. However,
* this function is only called by the tools such as umr to collect the latest
* packages for the hang analysis. We assume the hang happens near our latest
* submit. Thus we could use the following logic to give the clue:
* If the readptr is between start and end, then we return the copy pointer
* plus the distance from start to readptr. If the readptr is before start, we
* return the copy pointer. Lastly, if the readptr is past end, we return the
* write pointer.
*/
u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{
struct amdgpu_mux_entry *e;
u64 readp, offset, start, end;
e = amdgpu_ring_mux_sw_entry(mux, ring);
if (!e) {
DRM_ERROR("no sw entry found!\n");
return 0;
}
readp = amdgpu_ring_get_rptr(mux->real_ring);
start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
if (start > end) {
if (readp <= end)
readp += mux->real_ring->ring_size >> 2;
end += mux->real_ring->ring_size >> 2;
}
if (start <= readp && readp <= end) {
offset = readp - start;
e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
} else if (readp < start) {
e->sw_rptr = e->sw_cptr;
} else {
/* end < readptr */
e->sw_rptr = e->sw_wptr;
}
return e->sw_rptr;
}
u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
WARN_ON(!ring->is_sw_ring);
return amdgpu_ring_mux_get_rptr(mux, ring);
}
u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
WARN_ON(!ring->is_sw_ring);
return amdgpu_ring_mux_get_wptr(mux, ring);
}
void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
WARN_ON(!ring->is_sw_ring);
amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr);
}
/* Override insert_nop to prevent emitting nops to the software rings */
void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
WARN_ON(!ring->is_sw_ring);
}
const char *amdgpu_sw_ring_name(int idx)
{
return idx < ARRAY_SIZE(sw_ring_info) ?
sw_ring_info[idx].ring_name : NULL;
}
unsigned int amdgpu_sw_ring_priority(int idx)
{
return idx < ARRAY_SIZE(sw_ring_info) ?
sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
}
/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/
int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
{
struct amdgpu_ring *ring;
int i, need_preempt;
need_preempt = 0;
for (i = 0; i < mux->num_ring_entries; i++) {
ring = mux->ring_entry[i].ring;
if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
amdgpu_fence_count_emitted(ring) > 0)
return 0;
if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
amdgpu_fence_last_unsignaled_time_us(ring) >
AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
need_preempt = 1;
}
return need_preempt && !mux->s_resubmit;
}
/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */
int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
{
int r;
spin_lock(&mux->lock);
mux->pending_trailing_fence_signaled = true;
r = amdgpu_ring_preempt_ib(mux->real_ring);
spin_unlock(&mux->lock);
return r;
}
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
WARN_ON(!ring->is_sw_ring);
if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
if (amdgpu_mcbp_scan(mux) > 0)
amdgpu_mcbp_trigger_preempt(mux);
return;
}
amdgpu_ring_mux_start_ib(mux, ring);
}
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
WARN_ON(!ring->is_sw_ring);
if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
return;
amdgpu_ring_mux_end_ib(mux, ring);
}
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{
struct amdgpu_mux_entry *e;
struct amdgpu_mux_chunk *chunk;
spin_lock(&mux->lock);
amdgpu_mux_resubmit_chunks(mux);
spin_unlock(&mux->lock);
e = amdgpu_ring_mux_sw_entry(mux, ring);
if (!e) {
DRM_ERROR("cannot find entry!\n");
return;
}
chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
if (!chunk) {
DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
return;
}
chunk->start = ring->wptr;
list_add_tail(&chunk->entry, &e->list);
}
static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{
uint32_t last_seq, size = 0;
struct amdgpu_mux_entry *e;
struct amdgpu_mux_chunk *chunk, *tmp;
e = amdgpu_ring_mux_sw_entry(mux, ring);
if (!e) {
DRM_ERROR("cannot find entry!\n");
return;
}
last_seq = atomic_read(&ring->fence_drv.last_seq);
list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
if (chunk->sync_seq <= last_seq) {
list_del(&chunk->entry);
kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
} else {
size++;
}
}
}
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{
struct amdgpu_mux_entry *e;
struct amdgpu_mux_chunk *chunk;
e = amdgpu_ring_mux_sw_entry(mux, ring);
if (!e) {
DRM_ERROR("cannot find entry!\n");
return;
}
chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
if (!chunk) {
DRM_ERROR("cannot find chunk!\n");
return;
}
chunk->end = ring->wptr;
chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
scan_and_remove_signaled_chunk(mux, ring);
}
bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
{
struct amdgpu_mux_entry *e;
struct amdgpu_ring *ring = NULL;
int i;
if (!mux->pending_trailing_fence_signaled)
return false;
if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
return false;
for (i = 0; i < mux->num_ring_entries; i++) {
e = &mux->ring_entry[i];
if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
ring = e->ring;
break;
}
}
if (!ring) {
DRM_ERROR("cannot find low priority ring\n");
return false;
}
amdgpu_fence_process(ring);
if (amdgpu_fence_count_emitted(ring) > 0) {
mux->s_resubmit = true;
mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
amdgpu_ring_mux_schedule_resubmit(mux);
}
mux->pending_trailing_fence_signaled = false;
return true;
}

Просмотреть файл

@ -0,0 +1,103 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_RING_MUX__
#define __AMDGPU_RING_MUX__
#include <linux/timer.h>
#include <linux/spinlock.h>
#include "amdgpu_ring.h"
struct amdgpu_ring;
/**
* struct amdgpu_mux_entry - the entry recording software rings copying information.
* @ring: the pointer to the software ring.
* @start_ptr_in_hw_ring: last start location copied to in the hardware ring.
* @end_ptr_in_hw_ring: last end location copied to in the hardware ring.
* @sw_cptr: the position of the copy pointer in the sw ring.
* @sw_rptr: the read pointer in software ring.
* @sw_wptr: the write pointer in software ring.
* @list: list head for amdgpu_mux_chunk
*/
struct amdgpu_mux_entry {
struct amdgpu_ring *ring;
u64 start_ptr_in_hw_ring;
u64 end_ptr_in_hw_ring;
u64 sw_cptr;
u64 sw_rptr;
u64 sw_wptr;
struct list_head list;
};
struct amdgpu_ring_mux {
struct amdgpu_ring *real_ring;
struct amdgpu_mux_entry *ring_entry;
unsigned int num_ring_entries;
unsigned int ring_entry_size;
/*the lock for copy data from different software rings*/
spinlock_t lock;
bool s_resubmit;
uint32_t seqno_to_resubmit;
u64 wptr_resubmit;
struct timer_list resubmit_timer;
bool pending_trailing_fence_signaled;
};
/**
* struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings.
* @entry: the list entry.
* @sync_seq: the fence seqno related with the saved IB.
* @start:- start location on the software ring.
* @end:- end location on the software ring.
*/
struct amdgpu_mux_chunk {
struct list_head entry;
uint32_t sync_seq;
u64 start;
u64 end;
};
int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
unsigned int entry_size);
void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
const char *amdgpu_sw_ring_name(int idx);
unsigned int amdgpu_sw_ring_priority(int idx);
#endif

Просмотреть файл

@ -1545,7 +1545,7 @@ static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
NULL,
NULL);
&adev->mman.drv_vram_usage_va);
}
/**
@ -1583,8 +1583,9 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
*/
static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
{
uint64_t vram_size = adev->gmc.visible_vram_size;
u64 vram_size = adev->gmc.visible_vram_size;
adev->mman.drv_vram_usage_va = NULL;
adev->mman.drv_vram_usage_reserved_bo = NULL;
if (adev->mman.drv_vram_usage_size == 0 ||
@ -1596,7 +1597,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
adev->mman.drv_vram_usage_size,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.drv_vram_usage_reserved_bo,
NULL);
&adev->mman.drv_vram_usage_va);
}
/*

Просмотреть файл

@ -90,6 +90,7 @@ struct amdgpu_mman {
u64 drv_vram_usage_start_offset;
u64 drv_vram_usage_size;
struct amdgpu_bo *drv_vram_usage_reserved_bo;
void *drv_vram_usage_va;
/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
struct amdgpu_bo *sdma_access_bo;

Просмотреть файл

@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
break;
case IP_VERSION(3, 0, 2):
fw_name = FIRMWARE_VANGOGH;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
case IP_VERSION(3, 0, 16):
fw_name = FIRMWARE_DIMGREY_CAVEFISH;

Просмотреть файл

@ -428,11 +428,17 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
struct eeprom_table_record bp;
uint64_t retired_page;
uint32_t bp_idx, bp_cnt;
void *vram_usage_va = NULL;
if (adev->mman.fw_vram_usage_va)
vram_usage_va = adev->mman.fw_vram_usage_va;
else
vram_usage_va = adev->mman.drv_vram_usage_va;
if (bp_block_size) {
bp_cnt = bp_block_size / sizeof(uint64_t);
for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va +
retired_page = *(uint64_t *)(vram_usage_va +
bp_block_offset + bp_idx * sizeof(uint64_t));
bp.retired_page = retired_page;
@ -643,7 +649,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
if (adev->mman.fw_vram_usage_va != NULL) {
if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
/* go through this logic in ip_init and reset to init workqueue*/
amdgpu_virt_exchange_data(adev);
@ -666,32 +674,40 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
uint32_t bp_block_size = 0;
struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
if (adev->mman.fw_vram_usage_va != NULL) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
if (adev->mman.fw_vram_usage_va) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
} else if (adev->mman.drv_vram_usage_va) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
}
amdgpu_virt_read_pf2vf_data(adev);
amdgpu_virt_write_vf2pf_data(adev);
/* bad page handling for version 2 */
if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
bp_block_size = pf2vf_v2->bp_block_size;
bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
bp_block_size = pf2vf_v2->bp_block_size;
if (bp_block_size && !adev->virt.ras_init_done)
amdgpu_virt_init_ras_err_handler_data(adev);
if (bp_block_size && !adev->virt.ras_init_done)
amdgpu_virt_init_ras_err_handler_data(adev);
if (adev->virt.ras_init_done)
amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
}
if (adev->virt.ras_init_done)
amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
}
}
}

Просмотреть файл

@ -45,22 +45,43 @@
/**
* DOC: GPUVM
*
* GPUVM is similar to the legacy gart on older asics, however
* rather than there being a single global gart table
* for the entire GPU, there are multiple VM page tables active
* at any given time. The VM page tables can contain a mix
* vram pages and system memory pages and system memory pages
* GPUVM is the MMU functionality provided on the GPU.
* GPUVM is similar to the legacy GART on older asics, however
* rather than there being a single global GART table
* for the entire GPU, there can be multiple GPUVM page tables active
* at any given time. The GPUVM page tables can contain a mix
* VRAM pages and system pages (both memory and MMIO) and system pages
* can be mapped as snooped (cached system pages) or unsnooped
* (uncached system pages).
* Each VM has an ID associated with it and there is a page table
* associated with each VMID. When executing a command buffer,
* the kernel tells the ring what VMID to use for that command
*
* Each active GPUVM has an ID associated with it and there is a page table
* linked with each VMID. When executing a command buffer,
* the kernel tells the engine what VMID to use for that command
* buffer. VMIDs are allocated dynamically as commands are submitted.
* The userspace drivers maintain their own address space and the kernel
* sets up their pages tables accordingly when they submit their
* command buffers and a VMID is assigned.
* Cayman/Trinity support up to 8 active VMs at any given time;
* SI supports 16.
* The hardware supports up to 16 active GPUVMs at any given time.
*
* Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
* on the ASIC family. GPUVM supports RWX attributes on each page as well
* as other features such as encryption and caching attributes.
*
* VMID 0 is special. It is the GPUVM used for the kernel driver. In
* addition to an aperture managed by a page table, VMID 0 also has
* several other apertures. There is an aperture for direct access to VRAM
* and there is a legacy AGP aperture which just forwards accesses directly
* to the matching system physical addresses (or IOVAs when an IOMMU is
* present). These apertures provide direct access to these memories without
* incurring the overhead of a page table. VMID 0 is used by the kernel
* driver for tasks like memory management.
*
* GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
* For user applications, each application can have their own unique GPUVM
* address space. The application manages the address space and the kernel
* driver manages the GPUVM page tables for each process. If an GPU client
* accesses an invalid page, it will generate a GPU page fault, similar to
* accessing an invalid page on a CPU.
*/
#define START(node) ((node)->start)
@ -541,6 +562,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
return 0;
amdgpu_ring_ib_begin(ring);
if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
@ -601,6 +623,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
amdgpu_ring_emit_switch_buffer(ring);
amdgpu_ring_emit_switch_buffer(ring);
}
amdgpu_ring_ib_end(ring);
return 0;
}

Просмотреть файл

@ -386,7 +386,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
if (ret) {
dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
kobject_put(&hive->kobj);
kfree(hive);
hive = NULL;
goto pro_end;
}
@ -410,7 +409,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
ret = -ENOMEM;
kobject_put(&hive->kobj);
kfree(hive);
hive = NULL;
goto pro_end;
}

Просмотреть файл

@ -77,6 +77,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
{
@ -262,6 +266,7 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
soc15_program_register_sequence(adev,
golden_settings_gc_11_0_1,
(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
@ -855,6 +860,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
break;
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@ -1284,6 +1290,7 @@ static int gfx_v11_0_sw_init(void *handle)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
@ -2486,7 +2493,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
for (i = 0; i < adev->usec_timeout; i++) {
cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1))
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
bootload_status = RREG32_SOC15(GC, 0,
regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
else

Просмотреть файл

@ -47,6 +47,7 @@
#include "amdgpu_ras.h"
#include "amdgpu_ring_mux.h"
#include "gfx_v9_4.h"
#include "gfx_v9_0.h"
#include "gfx_v9_4_2.h"
@ -56,6 +57,7 @@
#include "asic_reg/gc/gc_9_0_default.h"
#define GFX9_NUM_GFX_RINGS 1
#define GFX9_NUM_SW_GFX_RINGS 2
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@ -753,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
@ -826,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
if (action == PREEMPT_QUEUES_NO_UNMAP) {
amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
amdgpu_ring_write(kiq_ring, seq);
amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@ -2103,6 +2106,7 @@ static int gfx_v9_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
unsigned int hw_prio;
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
@ -2186,6 +2190,9 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
/* disable scheduler on the real ring */
ring->no_scheduler = true;
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
AMDGPU_RING_PRIO_DEFAULT, NULL);
@ -2193,6 +2200,41 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
/* set up the software rings */
if (adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
ring = &adev->gfx.sw_gfx_ring[i];
ring->ring_obj = NULL;
sprintf(ring->name, amdgpu_sw_ring_name(i));
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
ring->is_sw_ring = true;
hw_prio = amdgpu_sw_ring_priority(i);
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
NULL);
if (r)
return r;
ring->wptr = 0;
}
/* init the muxer and add software rings */
r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
GFX9_NUM_SW_GFX_RINGS);
if (r) {
DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
return r;
}
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
&adev->gfx.sw_gfx_ring[i]);
if (r) {
DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
return r;
}
}
}
/* set up the compute queues - allocate horizontally across pipes */
ring_id = 0;
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
@ -2243,6 +2285,12 @@ static int gfx_v9_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
amdgpu_ring_mux_fini(&adev->gfx.muxer);
}
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
@ -5157,11 +5205,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
if (flags & AMDGPU_IB_PREEMPTED)
control |= INDIRECT_BUFFER_PRE_RESUME(1);
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
gfx_v9_0_ring_emit_de_meta(ring);
gfx_v9_0_ring_emit_de_meta(ring,
(!amdgpu_sriov_vf(ring->adev) &&
flags & AMDGPU_IB_PREEMPTED) ?
true : false);
}
amdgpu_ring_write(ring, header);
@ -5216,17 +5270,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
uint32_t dw2 = 0;
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
EOP_TC_NC_ACTION_EN) :
(EOP_TCL1_ACTION_EN |
EOP_TC_ACTION_EN |
EOP_TC_WB_ACTION_EN |
EOP_TC_MD_ACTION_EN)) |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
EVENT_INDEX(5)));
if (writeback) {
dw2 = EOP_TC_NC_ACTION_EN;
} else {
dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
EOP_TC_MD_ACTION_EN;
}
dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
EVENT_INDEX(5);
if (exec)
dw2 |= EOP_EXEC;
amdgpu_ring_write(ring, dw2);
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
/*
@ -5331,33 +5392,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0);
}
static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v9_ce_ib_state ce_payload = {0};
uint64_t csa_addr;
uint64_t offset, ce_payload_gpu_addr;
void *ce_payload_cpu_addr;
int cnt;
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
csa_addr = amdgpu_csa_vaddr(ring->adev);
if (ring->is_mes_queue) {
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
gfx[0].gfx_meta_data) +
offsetof(struct v9_gfx_meta_data, ce_payload);
ce_payload_gpu_addr =
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
ce_payload_cpu_addr =
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
} else {
offset = offsetof(struct v9_gfx_meta_data, ce_payload);
ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
if (resume)
amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
sizeof(ce_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
sizeof(ce_payload) >> 2);
}
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
return -ENOMEM;
}
/* assert preemption condition */
amdgpu_ring_set_preempt_cond_exec(ring, false);
ring->trail_seq += 1;
amdgpu_ring_alloc(ring, 13);
gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
/*reset the CP_VMID_PREEMPT after trailing fence*/
amdgpu_ring_emit_wreg(ring,
SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
0x0);
/* assert IB preemption, emit the trailing fence */
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
ring->trail_fence_gpu_addr,
ring->trail_seq);
amdgpu_ring_commit(kiq_ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
/* poll the trailing fence */
for (i = 0; i < adev->usec_timeout; i++) {
if (ring->trail_seq ==
le32_to_cpu(*ring->trail_fence_cpu_addr))
break;
udelay(1);
}
if (i >= adev->usec_timeout) {
r = -EINVAL;
DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
}
amdgpu_ring_commit(ring);
/* deassert preemption condition */
amdgpu_ring_set_preempt_cond_exec(ring, true);
return r;
}
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v9_de_ib_state de_payload = {0};
uint64_t csa_addr, gds_addr;
uint64_t offset, gds_addr, de_payload_gpu_addr;
void *de_payload_cpu_addr;
int cnt;
csa_addr = amdgpu_csa_vaddr(ring->adev);
gds_addr = csa_addr + 4096;
if (ring->is_mes_queue) {
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
gfx[0].gfx_meta_data) +
offsetof(struct v9_gfx_meta_data, de_payload);
de_payload_gpu_addr =
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
de_payload_cpu_addr =
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
gfx[0].gds_backup) +
offsetof(struct v9_gfx_meta_data, de_payload);
gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
} else {
offset = offsetof(struct v9_gfx_meta_data, de_payload);
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
AMDGPU_CSA_SIZE - adev->gds.gds_size,
PAGE_SIZE);
}
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@ -5367,9 +5530,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
if (resume)
amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
sizeof(de_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&de_payload,
sizeof(de_payload) >> 2);
}
static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
@ -5385,8 +5554,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
{
uint32_t dw2 = 0;
if (amdgpu_sriov_vf(ring->adev))
gfx_v9_0_ring_emit_ce_meta(ring);
gfx_v9_0_ring_emit_ce_meta(ring,
(!amdgpu_sriov_vf(ring->adev) &&
flags & AMDGPU_IB_PREEMPTED) ? true : false);
dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@ -5712,7 +5882,12 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
switch (me_id) {
case 0:
amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
if (adev->gfx.num_gfx_rings &&
!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
/* Fence signals are handled on the software rings*/
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
}
break;
case 1:
case 2:
@ -6709,6 +6884,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v9_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
.type = AMDGPU_RING_TYPE_GFX,
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
.secure_submission_supported = true,
.vmhub = AMDGPU_GFXHUB_0,
.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
.emit_frame_size = /* totally 242 maximum if 16 IBs */
5 + /* COND_EXEC */
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
* the first COND_EXEC jump to the place just
* prior to this double SWITCH_BUFFER
*/
5 + /* COND_EXEC */
7 + /* HDP_flush */
4 + /* VGT_flush */
14 + /* CE_META */
31 + /* DE_META */
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
7, /* gfx_v9_0_emit_mem_sync */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
.emit_fence = gfx_v9_0_ring_emit_fence,
.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
.insert_nop = amdgpu_sw_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v9_ring_emit_sb,
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
@ -6794,6 +7025,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
if (adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
}
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
}

Просмотреть файл

@ -759,6 +759,7 @@ static int gmc_v11_0_sw_init(void *handle)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
adev->num_vmhubs = 2;
/*
* To fulfill 4-level page support,

Просмотреть файл

@ -388,6 +388,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.disable_reset = 1;
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
return mes_v11_0_submit_pkt_and_poll_completion(mes,

Просмотреть файл

@ -82,10 +82,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode =
/* Navi1x */
static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@ -100,10 +100,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode =
/* Sienna Cichlid */
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] =
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@ -125,10 +125,10 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] =
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@ -149,7 +149,7 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode =
/* Beige Goby*/
static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@ -166,7 +166,7 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = {
/* Yellow Carp*/
static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},

Просмотреть файл

@ -46,6 +46,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@ -102,6 +104,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 11):
err = psp_init_toc_microcode(psp, chip_name);
if (err)
return err;

Просмотреть файл

@ -907,13 +907,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
/**
* sdma_v4_0_gfx_stop - stop the gfx async dma engines
* sdma_v4_0_gfx_enable - enable the gfx async dma engines
*
* @adev: amdgpu_device pointer
*
* Stop the gfx async dma ring buffers (VEGA10).
* @enable: enable SDMA RB/IB
* control the gfx async dma ring buffers (VEGA10).
*/
static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
{
u32 rb_cntl, ib_cntl;
int i;
@ -922,10 +922,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
}
}
@ -1044,7 +1044,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
int i;
if (!enable) {
sdma_v4_0_gfx_stop(adev);
sdma_v4_0_gfx_enable(adev, enable);
sdma_v4_0_rlc_stop(adev);
if (adev->sdma.has_page_queue)
sdma_v4_0_page_stop(adev);
@ -1960,8 +1960,10 @@ static int sdma_v4_0_suspend(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SMU saves SDMA state for us */
if (adev->in_s0ix)
if (adev->in_s0ix) {
sdma_v4_0_gfx_enable(adev, false);
return 0;
}
return sdma_v4_0_hw_fini(adev);
}
@ -1971,8 +1973,12 @@ static int sdma_v4_0_resume(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SMU restores SDMA state for us */
if (adev->in_s0ix)
if (adev->in_s0ix) {
sdma_v4_0_enable(adev, true);
sdma_v4_0_gfx_enable(adev, true);
amdgpu_ttm_set_buffer_funcs_status(adev, true);
return 0;
}
return sdma_v4_0_hw_init(adev);
}

Просмотреть файл

@ -103,10 +103,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =
/* Vega */
static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
};
@ -120,10 +120,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =
/* Raven */
static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
@ -138,10 +138,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =
/* Renoir, Arcturus */
static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},

Просмотреть файл

@ -162,6 +162,7 @@
* 2 - Bypass
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
#define PACKET3_COPY_DATA 0x40
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_COND_WRITE 0x45
@ -184,6 +185,7 @@
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
#define EOP_TC_NC_ACTION_EN (1 << 19)
#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
#define EOP_EXEC (1 << 28) /* For Trailing Fence */
#define DATA_SEL(x) ((x) << 29)
/* 0 - discard

Просмотреть файл

@ -43,6 +43,7 @@
#include "soc15.h"
#include "soc15_common.h"
#include "soc21.h"
#include "mxgpu_nv.h"
static const struct amd_ip_funcs soc21_common_ip_funcs;
@ -61,7 +62,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode =
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array[] =
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@ -325,6 +326,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 10):
return AMD_RESET_METHOD_MODE1;
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
return AMD_RESET_METHOD_MODE2;
default:
if (amdgpu_dpm_is_baco_supported(adev))
@ -644,24 +646,45 @@ static int soc21_common_early_init(void *handle)
AMD_PG_SUPPORT_JPEG;
adev->external_rev_id = adev->rev_id + 0x20;
break;
case IP_VERSION(11, 0, 4):
adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_JPEG;
adev->external_rev_id = adev->rev_id + 0x1;
break;
default:
/* FIXME: not supported yet */
return -EINVAL;
}
if (amdgpu_sriov_vf(adev))
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_init_setting(adev);
xgpu_nv_mailbox_set_irq_funcs(adev);
}
return 0;
}
static int soc21_common_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_get_irq(adev);
return 0;
}
static int soc21_common_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_add_irq_id(adev);
return 0;
}
@ -699,6 +722,9 @@ static int soc21_common_hw_fini(void *handle)
/* disable the doorbell aperture */
soc21_enable_doorbell_aperture(adev, false);
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_put_irq(adev);
return 0;
}

Просмотреть файл

@ -862,6 +862,28 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
return;
}
static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
bool indirect)
{
uint32_t tmp;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
return;
tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
WREG32_SOC15_DPG_MODE(inst_idx,
SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
tmp, 0, indirect);
tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
WREG32_SOC15_DPG_MODE(inst_idx,
SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
tmp, 0, indirect);
}
/**
* vcn_v4_0_start_dpg_mode - VCN start with dpg mode
*
@ -950,6 +972,8 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
vcn_v4_0_enable_ras(adev, inst_idx, indirect);
/* enable master interrupt */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_MASTINT_EN),

Просмотреть файл

@ -1521,6 +1521,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
break;

Просмотреть файл

@ -153,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
break;
default:
@ -394,6 +395,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &gfx_v11_kfd2kgd;
break;
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
gfx_target_version = 110003;
f2g = &gfx_v11_kfd2kgd;
break;

Просмотреть файл

@ -2012,10 +2012,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
kfd_debug_print_topology();
if (!res)
kfd_notify_gpu_change(gpu_id, 1);
kfd_notify_gpu_change(gpu_id, 1);
return res;
return 0;
}
/**

Просмотреть файл

@ -146,14 +146,6 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
/* Number of bytes in PSP footer for firmware. */
#define PSP_FOOTER_BYTES 0x100
/*
* DMUB Async to Sync Mechanism Status
*/
#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1
#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2
#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3
#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4
/**
* DOC: overview
*
@ -1104,7 +1096,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
/* Initialize hardware. */
memset(&hw_params, 0, sizeof(hw_params));
hw_params.fb_base = adev->gmc.fb_start;
hw_params.fb_offset = adev->gmc.aper_base;
hw_params.fb_offset = adev->vm_manager.vram_base_offset;
/* backdoor load firmware and trigger dmub running */
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
@ -1226,7 +1218,7 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24;
pa_config->system_aperture.fb_base = adev->gmc.fb_start;
pa_config->system_aperture.fb_offset = adev->gmc.aper_base;
pa_config->system_aperture.fb_offset = adev->vm_manager.vram_base_offset;
pa_config->system_aperture.fb_top = adev->gmc.fb_end;
pa_config->gart_config.page_table_start_addr = page_table_start.quad_part << 12;
@ -1441,6 +1433,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
memset(&init_params, 0, sizeof(init_params));
#endif
mutex_init(&adev->dm.dpia_aux_lock);
mutex_init(&adev->dm.dc_lock);
mutex_init(&adev->dm.audio_lock);
@ -1805,6 +1798,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
mutex_destroy(&adev->dm.audio_lock);
mutex_destroy(&adev->dm.dc_lock);
mutex_destroy(&adev->dm.dpia_aux_lock);
return;
}
@ -4875,6 +4869,35 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
return 0;
}
static inline void fill_dc_dirty_rect(struct drm_plane *plane,
struct rect *dirty_rect, int32_t x,
int32_t y, int32_t width, int32_t height,
int *i, bool ffu)
{
if (*i > DC_MAX_DIRTY_RECTS)
return;
if (*i == DC_MAX_DIRTY_RECTS)
goto out;
dirty_rect->x = x;
dirty_rect->y = y;
dirty_rect->width = width;
dirty_rect->height = height;
if (ffu)
drm_dbg(plane->dev,
"[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
plane->base.id, width, height);
else
drm_dbg(plane->dev,
"[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)",
plane->base.id, x, y, width, height);
out:
(*i)++;
}
/**
* fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates
*
@ -4895,10 +4918,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
* addition, certain use cases - such as cursor and multi-plane overlay (MPO) -
* implicitly provide damage clips without any client support via the plane
* bounds.
*
* Today, amdgpu_dm only supports the MPO and cursor usecase.
*
* TODO: Also enable for FB_DAMAGE_CLIPS
*/
static void fill_dc_dirty_rects(struct drm_plane *plane,
struct drm_plane_state *old_plane_state,
@ -4909,12 +4928,11 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
struct rect *dirty_rects = flip_addrs->dirty_rects;
uint32_t num_clips;
struct drm_mode_rect *clips;
bool bb_changed;
bool fb_changed;
uint32_t i = 0;
flip_addrs->dirty_rect_count = 0;
/*
* Cursor plane has it's own dirty rect update interface. See
* dcn10_dmub_update_cursor_data and dmub_cmd_update_cursor_info_data
@ -4922,20 +4940,20 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
if (plane->type == DRM_PLANE_TYPE_CURSOR)
return;
/*
* Today, we only consider MPO use-case for PSR SU. If MPO not
* requested, and there is a plane update, do FFU.
*/
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
clips = drm_plane_get_damage_clips(new_plane_state);
if (!dm_crtc_state->mpo_requested) {
dirty_rects[0].x = 0;
dirty_rects[0].y = 0;
dirty_rects[0].width = dm_crtc_state->base.mode.crtc_hdisplay;
dirty_rects[0].height = dm_crtc_state->base.mode.crtc_vdisplay;
flip_addrs->dirty_rect_count = 1;
DRM_DEBUG_DRIVER("[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
new_plane_state->plane->base.id,
dm_crtc_state->base.mode.crtc_hdisplay,
dm_crtc_state->base.mode.crtc_vdisplay);
if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
goto ffu;
for (; flip_addrs->dirty_rect_count < num_clips; clips++)
fill_dc_dirty_rect(new_plane_state->plane,
&dirty_rects[i], clips->x1,
clips->y1, clips->x2 - clips->x1,
clips->y2 - clips->y1,
&flip_addrs->dirty_rect_count,
false);
return;
}
@ -4946,7 +4964,6 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
* If plane is moved or resized, also add old bounding box to dirty
* rects.
*/
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
fb_changed = old_plane_state->fb->base.id !=
new_plane_state->fb->base.id;
bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x ||
@ -4954,36 +4971,51 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
old_plane_state->crtc_w != new_plane_state->crtc_w ||
old_plane_state->crtc_h != new_plane_state->crtc_h);
DRM_DEBUG_DRIVER("[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
new_plane_state->plane->base.id,
bb_changed, fb_changed, num_clips);
drm_dbg(plane->dev,
"[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
new_plane_state->plane->base.id,
bb_changed, fb_changed, num_clips);
if (num_clips || fb_changed || bb_changed) {
dirty_rects[i].x = new_plane_state->crtc_x;
dirty_rects[i].y = new_plane_state->crtc_y;
dirty_rects[i].width = new_plane_state->crtc_w;
dirty_rects[i].height = new_plane_state->crtc_h;
DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
new_plane_state->plane->base.id,
dirty_rects[i].x, dirty_rects[i].y,
dirty_rects[i].width, dirty_rects[i].height);
i += 1;
}
/* Add old plane bounding-box if plane is moved or resized */
if (bb_changed) {
dirty_rects[i].x = old_plane_state->crtc_x;
dirty_rects[i].y = old_plane_state->crtc_y;
dirty_rects[i].width = old_plane_state->crtc_w;
dirty_rects[i].height = old_plane_state->crtc_h;
DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
old_plane_state->plane->base.id,
dirty_rects[i].x, dirty_rects[i].y,
dirty_rects[i].width, dirty_rects[i].height);
i += 1;
fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
new_plane_state->crtc_x,
new_plane_state->crtc_y,
new_plane_state->crtc_w,
new_plane_state->crtc_h, &i, false);
/* Add old plane bounding-box if plane is moved or resized */
fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
old_plane_state->crtc_x,
old_plane_state->crtc_y,
old_plane_state->crtc_w,
old_plane_state->crtc_h, &i, false);
}
if (num_clips) {
for (; i < num_clips; clips++)
fill_dc_dirty_rect(new_plane_state->plane,
&dirty_rects[i], clips->x1,
clips->y1, clips->x2 - clips->x1,
clips->y2 - clips->y1, &i, false);
} else if (fb_changed && !bb_changed) {
fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
new_plane_state->crtc_x,
new_plane_state->crtc_y,
new_plane_state->crtc_w,
new_plane_state->crtc_h, &i, false);
}
if (i > DC_MAX_DIRTY_RECTS)
goto ffu;
flip_addrs->dirty_rect_count = i;
return;
ffu:
fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[0], 0, 0,
dm_crtc_state->base.mode.crtc_hdisplay,
dm_crtc_state->base.mode.crtc_vdisplay,
&flip_addrs->dirty_rect_count, true);
}
static void update_stream_scaling_settings(const struct drm_display_mode *mode,
@ -10204,91 +10236,92 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
return value;
}
static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux,
struct dc_context *ctx,
uint8_t status_type,
uint32_t *operation_result)
int amdgpu_dm_process_dmub_aux_transfer_sync(
struct dc_context *ctx,
unsigned int link_index,
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
struct amdgpu_device *adev = ctx->driver_context;
int return_status = -1;
struct dmub_notification *p_notify = adev->dm.dmub_notify;
int ret = -1;
if (is_cmd_aux) {
if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
return_status = p_notify->aux_reply.length;
*operation_result = p_notify->result;
} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) {
*operation_result = AUX_RET_ERROR_TIMEOUT;
} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) {
*operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) {
*operation_result = AUX_RET_ERROR_INVALID_REPLY;
} else {
*operation_result = AUX_RET_ERROR_UNKNOWN;
}
} else {
if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
return_status = 0;
*operation_result = p_notify->sc_status;
} else {
*operation_result = SET_CONFIG_UNKNOWN_ERROR;
}
mutex_lock(&adev->dm.dpia_aux_lock);
if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) {
*operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
goto out;
}
if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
DRM_ERROR("wait_for_completion_timeout timeout!");
*operation_result = AUX_RET_ERROR_TIMEOUT;
goto out;
}
return return_status;
if (p_notify->result != AUX_RET_SUCCESS) {
/*
* Transient states before tunneling is enabled could
* lead to this error. We can ignore this for now.
*/
if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) {
DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n",
payload->address, payload->length,
p_notify->result);
}
*operation_result = AUX_RET_ERROR_INVALID_REPLY;
goto out;
}
payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
if (!payload->write && p_notify->aux_reply.length &&
(payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) {
if (payload->length != p_notify->aux_reply.length) {
DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n",
p_notify->aux_reply.length,
payload->address, payload->length);
*operation_result = AUX_RET_ERROR_INVALID_REPLY;
goto out;
}
memcpy(payload->data, p_notify->aux_reply.data,
p_notify->aux_reply.length);
}
/* success */
ret = p_notify->aux_reply.length;
*operation_result = p_notify->result;
out:
mutex_unlock(&adev->dm.dpia_aux_lock);
return ret;
}
int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx,
unsigned int link_index, void *cmd_payload, void *operation_result)
int amdgpu_dm_process_dmub_set_config_sync(
struct dc_context *ctx,
unsigned int link_index,
struct set_config_cmd_payload *payload,
enum set_config_status *operation_result)
{
struct amdgpu_device *adev = ctx->driver_context;
int ret = 0;
bool is_cmd_complete;
int ret;
if (is_cmd_aux) {
dc_process_dmub_aux_transfer_async(ctx->dc,
link_index, (struct aux_payload *)cmd_payload);
} else if (dc_process_dmub_set_config_async(ctx->dc, link_index,
(struct set_config_cmd_payload *)cmd_payload,
adev->dm.dmub_notify)) {
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
(uint32_t *)operation_result);
}
mutex_lock(&adev->dm.dpia_aux_lock);
is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc,
link_index, payload, adev->dm.dmub_notify);
ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ);
if (ret == 0) {
if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
ret = 0;
*operation_result = adev->dm.dmub_notify->sc_status;
} else {
DRM_ERROR("wait_for_completion_timeout timeout!");
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT,
(uint32_t *)operation_result);
ret = -1;
*operation_result = SET_CONFIG_UNKNOWN_ERROR;
}
if (is_cmd_aux) {
if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
struct aux_payload *payload = (struct aux_payload *)cmd_payload;
payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) {
if (payload->length != adev->dm.dmub_notify->aux_reply.length) {
DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n",
payload->address, payload->length,
adev->dm.dmub_notify->aux_reply.length);
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx,
DMUB_ASYNC_TO_SYNC_ACCESS_INVALID,
(uint32_t *)operation_result);
}
memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
adev->dm.dmub_notify->aux_reply.length);
}
}
}
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
(uint32_t *)operation_result);
mutex_unlock(&adev->dm.dpia_aux_lock);
return ret;
}
/*

Просмотреть файл

@ -59,7 +59,9 @@
#include "signal_types.h"
#include "amdgpu_dm_crc.h"
struct aux_payload;
struct set_config_cmd_payload;
enum aux_return_code_type;
enum set_config_status;
/* Forward declarations */
struct amdgpu_device;
@ -542,6 +544,13 @@ struct amdgpu_display_manager {
* occurred on certain intel platform
*/
bool aux_hpd_discon_quirk;
/**
* @dpia_aux_lock:
*
* Guards access to DPIA AUX
*/
struct mutex dpia_aux_lock;
};
enum dsc_clock_force_state {
@ -785,9 +794,11 @@ void amdgpu_dm_update_connector_after_detect(
extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux,
struct dc_context *ctx, unsigned int link_index,
void *payload, void *operation_result);
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
struct aux_payload *payload, enum aux_return_code_type *operation_result);
int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
bool check_seamless_boot_capability(struct amdgpu_device *adev);

Просмотреть файл

@ -2639,6 +2639,25 @@ static int dp_mst_progress_status_show(struct seq_file *m, void *unused)
return 0;
}
/*
* Reports whether the connected display is a USB4 DPIA tunneled display
* Example usage: cat /sys/kernel/debug/dri/0/DP-8/is_dpia_link
*/
static int is_dpia_link_show(struct seq_file *m, void *data)
{
struct drm_connector *connector = m->private;
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
struct dc_link *link = aconnector->dc_link;
if (connector->status != connector_status_connected)
return -ENODEV;
seq_printf(m, "%s\n", (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? "yes" :
(link->ep_type == DISPLAY_ENDPOINT_PHY) ? "no" : "unknown");
return 0;
}
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
@ -2650,6 +2669,7 @@ DEFINE_SHOW_ATTRIBUTE(internal_display);
DEFINE_SHOW_ATTRIBUTE(psr_capability);
DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status);
DEFINE_SHOW_ATTRIBUTE(is_dpia_link);
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
.owner = THIS_MODULE,
@ -2794,7 +2814,8 @@ static const struct {
{"max_bpc", &dp_max_bpc_debugfs_fops},
{"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops},
{"is_mst_connector", &dp_is_mst_connector_fops},
{"mst_progress_status", &dp_mst_progress_status_fops}
{"mst_progress_status", &dp_mst_progress_status_fops},
{"is_dpia_link", &is_dpia_link_fops}
};
#ifdef CONFIG_DRM_AMD_DC_HDCP

Просмотреть файл

@ -817,9 +817,8 @@ int dm_helper_dmub_aux_transfer_sync(
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx,
link->link_index, (void *)payload,
(void *)operation_result);
return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
operation_result);
}
int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
@ -827,9 +826,8 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
struct set_config_cmd_payload *payload,
enum set_config_status *operation_result)
{
return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx,
link->link_index, (void *)payload,
(void *)operation_result);
return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload,
operation_result);
}
void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)

Просмотреть файл

@ -1600,6 +1600,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
supported_rotations);
if (dm->adev->ip_versions[DCE_HWIP][0] > IP_VERSION(3, 0, 1) &&
plane->type != DRM_PLANE_TYPE_CURSOR)
drm_plane_enable_fb_damage_clips(plane);
drm_plane_helper_add(plane, &dm_plane_helper_funcs);
#ifdef CONFIG_DRM_AMD_DC_HDR

Просмотреть файл

@ -438,7 +438,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
}
if (!new_clocks->dtbclk_en) {
new_clocks->ref_dtbclk_khz = 0;
new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
}
/* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */

Просмотреть файл

@ -3061,7 +3061,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
* Ensures that we have enough pipes for newly added MPO planes
*/
if (dc->res_pool->funcs->remove_phantom_pipes)
dc->res_pool->funcs->remove_phantom_pipes(dc, context);
dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
/*remove old surfaces from context */
if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
@ -3954,6 +3954,7 @@ bool dc_update_planes_and_stream(struct dc *dc,
struct dc_state *context;
enum surface_update_type update_type;
int i;
struct mall_temp_config mall_temp_config;
/* In cases where MPO and split or ODM are used transitions can
* cause underflow. Apply stream configuration with minimal pipe
@ -3985,11 +3986,29 @@ bool dc_update_planes_and_stream(struct dc *dc,
/* on plane removal, minimal state is the new one */
if (force_minimal_pipe_splitting && !is_plane_addition) {
/* Since all phantom pipes are removed in full validation,
* we have to save and restore the subvp/mall config when
* we do a minimal transition since the flags marking the
* pipe as subvp/phantom will be cleared (dc copy constructor
* creates a shallow copy).
*/
if (dc->res_pool->funcs->save_mall_state)
dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
if (!commit_minimal_transition_state(dc, context)) {
dc_release_state(context);
return false;
}
if (dc->res_pool->funcs->restore_mall_state)
dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
/* If we do a minimal transition with plane removal and the context
* has subvp we also have to retain back the phantom stream / planes
* since the refcount is decremented as part of the min transition
* (we commit a state with no subvp, so the phantom streams / planes
* had to be removed).
*/
if (dc->res_pool->funcs->retain_phantom_pipes)
dc->res_pool->funcs->retain_phantom_pipes(dc, context);
update_type = UPDATE_TYPE_FULL;
}

Просмотреть файл

@ -47,7 +47,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
#define DC_VER "3.2.213"
#define DC_VER "3.2.214"
#define MAX_SURFACES 3
#define MAX_PLANES 6

Просмотреть файл

@ -160,6 +160,17 @@ struct mall_stream_config {
struct dc_stream_state *paired_stream; // master / slave stream
};
/* Temp struct used to save and restore MALL config
* during validation.
*
* TODO: Move MALL config into dc_state instead of stream struct
* to avoid needing to save/restore.
*/
struct mall_temp_config {
struct mall_stream_config mall_stream_config[MAX_PIPES];
bool is_phantom_plane[MAX_PIPES];
};
struct dc_stream_state {
// sink is deprecated, new code should not reference
// this pointer

Просмотреть файл

@ -869,6 +869,32 @@ static void false_optc_underflow_wa(
tg->funcs->clear_optc_underflow(tg);
}
static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
{
struct pipe_ctx *other_pipe;
int vready_offset = pipe->pipe_dlg_param.vready_offset;
/* Always use the largest vready_offset of all connected pipes */
for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
}
for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
}
for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
}
for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
}
return vready_offset;
}
enum dc_status dcn10_enable_stream_timing(
struct pipe_ctx *pipe_ctx,
struct dc_state *context,
@ -912,7 +938,7 @@ enum dc_status dcn10_enable_stream_timing(
pipe_ctx->stream_res.tg->funcs->program_timing(
pipe_ctx->stream_res.tg,
&stream->timing,
pipe_ctx->pipe_dlg_param.vready_offset,
calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width,
@ -2908,7 +2934,7 @@ void dcn10_program_pipe(
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg,
pipe_ctx->pipe_dlg_param.vready_offset,
calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width);

Просмотреть файл

@ -1652,6 +1652,31 @@ static void dcn20_update_dchubp_dpp(
hubp->funcs->phantom_hubp_post_enable(hubp);
}
static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
{
struct pipe_ctx *other_pipe;
int vready_offset = pipe->pipe_dlg_param.vready_offset;
/* Always use the largest vready_offset of all connected pipes */
for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
}
for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
}
for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
}
for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
}
return vready_offset;
}
static void dcn20_program_pipe(
struct dc *dc,
@ -1670,7 +1695,7 @@ static void dcn20_program_pipe(
&& !pipe_ctx->prev_odm_pipe) {
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg,
pipe_ctx->pipe_dlg_param.vready_offset,
calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width);
@ -1716,7 +1741,10 @@ static void dcn20_program_pipe(
* only do gamma programming for powering on, internal memcmp to avoid
* updating on slave planes
*/
if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf)
if (pipe_ctx->update_flags.bits.enable ||
pipe_ctx->update_flags.bits.plane_changed ||
pipe_ctx->stream->update_flags.bits.out_tf ||
pipe_ctx->plane_state->update_flags.bits.output_tf_change)
hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
/* If the pipe has been enabled or has a different opp, we
@ -2067,7 +2095,7 @@ bool dcn20_update_bandwidth(
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg,
pipe_ctx->pipe_dlg_param.vready_offset,
calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width);

Просмотреть файл

@ -225,11 +225,7 @@ static void dccg32_set_dtbclk_dto(
} else {
REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
DTBCLK_DTO_ENABLE[params->otg_inst], 0,
PIPE_DTO_SRC_SEL[params->otg_inst], 1);
if (params->is_hdmi)
REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
PIPE_DTO_SRC_SEL[params->otg_inst], 0);
PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
}

Просмотреть файл

@ -262,11 +262,11 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
/* For DCC:
* meta_num_mblk = CEILING(full_mblk_width_ub_l*full_mblk_height_ub_l*Bpe/256/mblk_bytes, 1)
/*For DCC:
* meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
*/
if (pipe->plane_state->dcc.enable)
num_mblks += (mall_alloc_width_blk_aligned * mall_alloc_width_blk_aligned * bytes_per_pixel +
num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
@ -316,8 +316,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) /
DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) /
dc->caps.cache_line_size + 2;
break;
}
break;
}
}

Просмотреть файл

@ -1743,7 +1743,7 @@ void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context)
}
// return true if removed piped from ctx, false otherwise
bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update)
{
int i;
bool removed_pipe = false;
@ -1770,14 +1770,23 @@ bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
removed_pipe = true;
}
// Clear all phantom stream info
if (pipe->stream) {
pipe->stream->mall_stream_config.type = SUBVP_NONE;
pipe->stream->mall_stream_config.paired_stream = NULL;
}
/* For non-full updates, a shallow copy of the current state
* is created. In this case we don't want to erase the current
* state (there can be 2 HIRQL threads, one in flip, and one in
* checkMPO) that can cause a race condition.
*
* This is just a workaround, needs a proper fix.
*/
if (!fast_update) {
// Clear all phantom stream info
if (pipe->stream) {
pipe->stream->mall_stream_config.type = SUBVP_NONE;
pipe->stream->mall_stream_config.paired_stream = NULL;
}
if (pipe->plane_state) {
pipe->plane_state->is_phantom = false;
if (pipe->plane_state) {
pipe->plane_state->is_phantom = false;
}
}
}
return removed_pipe;
@ -1950,23 +1959,28 @@ int dcn32_populate_dml_pipes_from_context(
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19;
switch (pipe->stream->mall_stream_config.type) {
case SUBVP_MAIN:
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
subvp_in_use = true;
break;
case SUBVP_PHANTOM:
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe;
pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
// Disallow unbounded req for SubVP according to DCHUB programming guide
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
break;
case SUBVP_NONE:
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable;
pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
break;
default:
break;
/* Only populate DML input with subvp info for full updates.
* This is just a workaround -- needs a proper fix.
*/
if (!fast_validate) {
switch (pipe->stream->mall_stream_config.type) {
case SUBVP_MAIN:
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
subvp_in_use = true;
break;
case SUBVP_PHANTOM:
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe;
pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
// Disallow unbounded req for SubVP according to DCHUB programming guide
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
break;
case SUBVP_NONE:
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable;
pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
break;
default:
break;
}
}
pipes[pipe_cnt].dout.dsc_input_bpc = 0;
@ -2055,6 +2069,8 @@ static struct resource_funcs dcn32_res_pool_funcs = {
.add_phantom_pipes = dcn32_add_phantom_pipes,
.remove_phantom_pipes = dcn32_remove_phantom_pipes,
.retain_phantom_pipes = dcn32_retain_phantom_pipes,
.save_mall_state = dcn32_save_mall_state,
.restore_mall_state = dcn32_restore_mall_state,
};

Просмотреть файл

@ -45,17 +45,6 @@
extern struct _vcs_dpi_ip_params_st dcn3_2_ip;
extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc;
/* Temp struct used to save and restore MALL config
* during validation.
*
* TODO: Move MALL config into dc_state instead of stream struct
* to avoid needing to save/restore.
*/
struct mall_temp_config {
struct mall_stream_config mall_stream_config[MAX_PIPES];
bool is_phantom_plane[MAX_PIPES];
};
struct dcn32_resource_pool {
struct resource_pool base;
};
@ -81,7 +70,7 @@ bool dcn32_release_post_bldn_3dlut(
struct dc_transfer_func **shaper);
bool dcn32_remove_phantom_pipes(struct dc *dc,
struct dc_state *context);
struct dc_state *context, bool fast_update);
void dcn32_retain_phantom_pipes(struct dc *dc,
struct dc_state *context);

Просмотреть файл

@ -97,14 +97,14 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
* FLOOR(vp_x_start, blk_width)
*/
full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) +
pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) -
(pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
/* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
* FLOOR(vp_y_start, blk_height)
*/
full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
full_vp_height + mblk_height - 1) / mblk_height * mblk_height) +
full_vp_height + mblk_height - 1) / mblk_height * mblk_height) -
(pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
/* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */
@ -121,14 +121,19 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
*/
num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
/*For DCC:
* meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
*/
if (pipe->plane_state->dcc.enable)
num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
// cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
// (MALL is 64-byte aligned)
cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2;
/* For DCC divide by 256 */
if (pipe->plane_state->dcc.enable)
cache_lines_per_plane = cache_lines_per_plane + (cache_lines_per_plane / 256) + 1;
cache_lines_used += cache_lines_per_plane;
}
}

Просмотреть файл

@ -1622,6 +1622,8 @@ static struct resource_funcs dcn321_res_pool_funcs = {
.add_phantom_pipes = dcn32_add_phantom_pipes,
.remove_phantom_pipes = dcn32_remove_phantom_pipes,
.retain_phantom_pipes = dcn32_retain_phantom_pipes,
.save_mall_state = dcn32_save_mall_state,
.restore_mall_state = dcn32_restore_mall_state,
};

Просмотреть файл

@ -559,6 +559,9 @@ void dcn31_calculate_wm_and_dlg_fp(
context->bw_ctx.bw.dcn.clk.dramclk_khz = 0;
context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (context->res_ctx.pipe_ctx[i].stream)
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
}
}

Просмотреть файл

@ -1203,7 +1203,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
// remove phantom pipes and repopulate dml pipes
if (!found_supported_config) {
dc->res_pool->funcs->remove_phantom_pipes(dc, context);
dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
*pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
@ -1320,7 +1320,10 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
if (context->res_ctx.pipe_ctx[i].plane_state)
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
else
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
pipe_idx++;
}
@ -1515,7 +1518,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
return false;
// For each full update, remove all existing phantom pipes first
dc->res_pool->funcs->remove_phantom_pipes(dc, context);
dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate);
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);

Просмотреть файл

@ -240,9 +240,11 @@ struct resource_funcs {
unsigned int pipe_cnt,
unsigned int index);
bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context);
bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update);
void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context);
void (*get_panel_config_defaults)(struct dc_panel_config *panel_config);
void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
};
struct audio_support{

Просмотреть файл

@ -222,7 +222,11 @@ union MESAPI_SET_HW_RESOURCES {
uint32_t apply_grbm_remote_register_dummy_read_wa : 1;
uint32_t second_gfx_pipe_enabled : 1;
uint32_t enable_level_process_quantum_check : 1;
uint32_t reserved : 25;
uint32_t legacy_sch_mode : 1;
uint32_t disable_add_queue_wptr_mc_addr : 1;
uint32_t enable_mes_event_int_logging : 1;
uint32_t enable_reg_active_poll : 1;
uint32_t reserved : 21;
};
uint32_t uint32_t_all;
};

Просмотреть файл

@ -161,7 +161,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu,
int smu_set_gfx_power_up_by_imu(struct smu_context *smu)
{
if (!smu->ppt_funcs && !smu->ppt_funcs->set_gfx_power_up_by_imu)
if (!smu->ppt_funcs || !smu->ppt_funcs->set_gfx_power_up_by_imu)
return -EOPNOTSUPP;
return smu->ppt_funcs->set_gfx_power_up_by_imu(smu);
@ -585,6 +585,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
yellow_carp_set_ppt_funcs(smu);
break;
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
smu_v13_0_4_set_ppt_funcs(smu);
break;
case IP_VERSION(13, 0, 5):

Просмотреть файл

@ -79,6 +79,17 @@ MODULE_FIRMWARE("amdgpu/beige_goby_smc.bin");
#define mmTHM_BACO_CNTL_ARCT 0xA7
#define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0
static void smu_v11_0_poll_baco_exit(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
uint32_t data, loop = 0;
do {
usleep_range(1000, 1100);
data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
} while ((data & 0x100) && (++loop < 100));
}
int smu_v11_0_init_microcode(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
@ -1588,6 +1599,10 @@ bool smu_v11_0_baco_is_support(struct smu_context *smu)
if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support)
return false;
/* return true if ASIC is in BACO state already */
if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
return true;
/* Arcturus does not support this bit mask */
if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
@ -1685,7 +1700,18 @@ int smu_v11_0_baco_enter(struct smu_context *smu)
int smu_v11_0_baco_exit(struct smu_context *smu)
{
return smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
int ret;
ret = smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
if (!ret) {
/*
* Poll BACO exit status to ensure FW has completed
* BACO exit process to avoid timing issues.
*/
smu_v11_0_poll_baco_exit(smu);
}
return ret;
}
int smu_v11_0_mode1_reset(struct smu_context *smu)

Просмотреть файл

@ -250,6 +250,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu)
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
mp1_fw_flags = RREG32_PCIE(MP1_Public |
(smnMP1_V13_0_4_FIRMWARE_FLAGS & 0xffffffff));
break;
@ -301,6 +302,7 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP;
break;
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_4;
break;
case IP_VERSION(13, 0, 5):
@ -841,6 +843,7 @@ int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable)
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
return 0;
if (enable)

Просмотреть файл

@ -1026,6 +1026,15 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = {
.set_gfx_power_up_by_imu = smu_v13_0_set_gfx_power_up_by_imu,
};
static void smu_v13_0_4_set_smu_mailbox_registers(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82);
smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66);
smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
}
void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
@ -1035,7 +1044,9 @@ void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu)
smu->feature_map = smu_v13_0_4_feature_mask_map;
smu->table_map = smu_v13_0_4_table_map;
smu->is_apu = true;
smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82);
smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66);
smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4))
smu_v13_0_4_set_smu_mailbox_registers(smu);
else
smu_v13_0_set_smu_mailbox_registers(smu);
}

Просмотреть файл

@ -227,6 +227,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
if (!found)
return false;
pci_dev_put(pdev);
rdev->bios = kmalloc(size, GFP_KERNEL);
if (!rdev->bios) {