Merge tag 'amd-drm-next-6.2-2022-12-02' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.2-2022-12-02: amdgpu: - Fix CPU stalls when allocating large amounts of system memory - SR-IOV fixes - BACO fixes - Enable GC 11.0.4 - Enable PSP 13.0.11 - Enable SMU 13.0.11 - Enable NBIO 7.7.1 - Fix reported VCN capabilities for RDNA2 - Misc cleanups - PCI ref count fixes - DCN DPIA fixes - DCN 3.2.x fixes - Documentation updates - GC 11.x fixes - VCN RAS fixes - APU fix for passthrough - PSR fixes - GFX preemption support for gfx9 - SDMA fix for S0ix amdkfd: - Enable KFD support for GC 11.0.4 - Misc cleanups - Fix memory leak Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20221202160659.5987-1-alexander.deucher@amd.com
This commit is contained in:
Коммит
9e5737bd04
|
@ -30,12 +30,35 @@ we have a dedicated glossary for Display Core at
|
|||
EOP
|
||||
End Of Pipe/Pipeline
|
||||
|
||||
GART
|
||||
Graphics Address Remapping Table. This is the name we use for the GPUVM
|
||||
page table used by the GPU kernel driver. It remaps system resources
|
||||
(memory or MMIO space) into the GPU's address space so the GPU can access
|
||||
them. The name GART harkens back to the days of AGP when the platform
|
||||
provided an MMU that the GPU could use to get a contiguous view of
|
||||
scattered pages for DMA. The MMU has since moved on to the GPU, but the
|
||||
name stuck.
|
||||
|
||||
GC
|
||||
Graphics and Compute
|
||||
|
||||
GMC
|
||||
Graphic Memory Controller
|
||||
|
||||
GPUVM
|
||||
GPU Virtual Memory. This is the GPU's MMU. The GPU supports multiple
|
||||
virtual address spaces that can be in flight at any given time. These
|
||||
allow the GPU to remap VRAM and system resources into GPU virtual address
|
||||
spaces for use by the GPU kernel driver and applications using the GPU.
|
||||
These provide memory protection for different applications using the GPU.
|
||||
|
||||
GTT
|
||||
Graphics Translation Tables. This is a memory pool managed through TTM
|
||||
which provides access to system resources (memory or MMIO space) for
|
||||
use by the GPU. These addresses can be mapped into the "GART" GPUVM page
|
||||
table for use by the kernel driver or into per process GPUVM page tables
|
||||
for application usage.
|
||||
|
||||
IH
|
||||
Interrupt Handler
|
||||
|
||||
|
|
|
@ -148,10 +148,10 @@ PRIME Buffer Sharing
|
|||
MMU Notifier
|
||||
============
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
|
||||
:doc: MMU Notifier
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
|
||||
:internal:
|
||||
|
||||
AMDGPU Virtual Memory
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
==========================
|
||||
|
||||
The drm/amdgpu driver supports all AMD Radeon GPUs based on the Graphics Core
|
||||
Next (GCN) architecture.
|
||||
Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures.
|
||||
|
||||
.. toctree::
|
||||
|
||||
|
|
|
@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
|
|||
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
|
||||
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
|
||||
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
|
||||
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
|
||||
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
|
||||
amdgpu_ring_mux.o
|
||||
|
||||
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
|
||||
|
||||
|
|
|
@ -2204,7 +2204,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
|
|||
|
||||
ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
|
||||
if (ret) {
|
||||
kfree(mem);
|
||||
kfree(*mem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -104,7 +104,7 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
|
|||
static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
|
||||
struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes)
|
||||
{
|
||||
uint32_t start_addr, fw_size, drv_size;
|
||||
u32 start_addr, fw_size, drv_size;
|
||||
|
||||
start_addr = le32_to_cpu(fw_usage->start_address_in_kb);
|
||||
fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
|
||||
|
@ -116,7 +116,7 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
|
|||
drv_size);
|
||||
|
||||
if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
|
||||
(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
|
||||
(u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
|
||||
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
|
||||
/* Firmware request VRAM reservation for SR-IOV */
|
||||
adev->mman.fw_vram_usage_start_offset = (start_addr &
|
||||
|
@ -133,7 +133,7 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
|
|||
static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
|
||||
struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes)
|
||||
{
|
||||
uint32_t fw_start_addr, fw_size, drv_start_addr, drv_size;
|
||||
u32 fw_start_addr, fw_size, drv_start_addr, drv_size;
|
||||
|
||||
fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb);
|
||||
fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
|
||||
|
@ -147,14 +147,18 @@ static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
|
|||
drv_start_addr,
|
||||
drv_size);
|
||||
|
||||
if ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) {
|
||||
if (amdgpu_sriov_vf(adev) &&
|
||||
((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
|
||||
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
|
||||
/* Firmware request VRAM reservation for SR-IOV */
|
||||
adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
|
||||
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
|
||||
adev->mman.fw_vram_usage_size = fw_size << 10;
|
||||
}
|
||||
|
||||
if ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) {
|
||||
if (amdgpu_sriov_vf(adev) &&
|
||||
((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
|
||||
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
|
||||
/* driver request VRAM reservation for SR-IOV */
|
||||
adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
|
||||
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
|
||||
|
@ -172,8 +176,8 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
|
|||
vram_usagebyfirmware);
|
||||
struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1;
|
||||
struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2;
|
||||
uint16_t data_offset;
|
||||
uint8_t frev, crev;
|
||||
u16 data_offset;
|
||||
u8 frev, crev;
|
||||
int usage_bytes = 0;
|
||||
|
||||
if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
|
||||
|
|
|
@ -317,6 +317,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
|
|||
|
||||
if (!found)
|
||||
return false;
|
||||
pci_dev_put(pdev);
|
||||
|
||||
adev->bios = kmalloc(size, GFP_KERNEL);
|
||||
if (!adev->bios) {
|
||||
|
|
|
@ -2473,6 +2473,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
|||
if (!amdgpu_sriov_vf(adev)) {
|
||||
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
|
||||
|
||||
if (WARN_ON(!hive)) {
|
||||
r = -ENOENT;
|
||||
goto init_failed;
|
||||
}
|
||||
|
||||
if (!hive->reset_domain ||
|
||||
!amdgpu_reset_get_reset_domain(hive->reset_domain)) {
|
||||
r = -ENOENT;
|
||||
|
|
|
@ -1512,6 +1512,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
|
||||
break;
|
||||
default:
|
||||
|
@ -1556,6 +1557,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
|
||||
break;
|
||||
default:
|
||||
|
@ -1641,6 +1643,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(13, 0, 7):
|
||||
case IP_VERSION(13, 0, 8):
|
||||
case IP_VERSION(13, 0, 10):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
|
||||
break;
|
||||
case IP_VERSION(13, 0, 4):
|
||||
|
@ -1691,6 +1694,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(13, 0, 7):
|
||||
case IP_VERSION(13, 0, 8):
|
||||
case IP_VERSION(13, 0, 10):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
|
||||
break;
|
||||
default:
|
||||
|
@ -1804,6 +1808,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
|
||||
break;
|
||||
default:
|
||||
|
@ -1967,6 +1972,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
|
||||
adev->enable_mes = true;
|
||||
adev->enable_mes_kiq = true;
|
||||
|
@ -2197,6 +2203,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
|||
adev->family = AMDGPU_FAMILY_GC_11_0_0;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
adev->family = AMDGPU_FAMILY_GC_11_0_1;
|
||||
break;
|
||||
default:
|
||||
|
@ -2214,6 +2221,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(10, 3, 6):
|
||||
case IP_VERSION(10, 3, 7):
|
||||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
adev->flags |= AMD_IS_APU;
|
||||
break;
|
||||
default:
|
||||
|
@ -2270,6 +2278,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
|||
adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;
|
||||
break;
|
||||
case IP_VERSION(7, 7, 0):
|
||||
case IP_VERSION(7, 7, 1):
|
||||
adev->nbio.funcs = &nbio_v7_7_funcs;
|
||||
adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
|
||||
break;
|
||||
|
|
|
@ -231,16 +231,18 @@ module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
|
|||
|
||||
/**
|
||||
* DOC: gartsize (uint)
|
||||
* Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic).
|
||||
* Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing.
|
||||
* The default is -1 (The size depends on asic).
|
||||
*/
|
||||
MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
|
||||
MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)");
|
||||
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
|
||||
|
||||
/**
|
||||
* DOC: gttsize (int)
|
||||
* Restrict the size of GTT domain in MiB for testing. The default is -1 (Use 1/2 RAM, minimum value is 3GB).
|
||||
* Restrict the size of GTT domain (for userspace use) in MiB for testing.
|
||||
* The default is -1 (Use 1/2 RAM, minimum value is 3GB).
|
||||
*/
|
||||
MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
|
||||
MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
|
||||
module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
|
||||
|
||||
/**
|
||||
|
@ -2569,6 +2571,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
|
|||
amdgpu_device_baco_enter(drm_dev);
|
||||
}
|
||||
|
||||
dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -55,6 +55,7 @@ struct amdgpu_fence {
|
|||
|
||||
/* RB, DMA, etc. */
|
||||
struct amdgpu_ring *ring;
|
||||
ktime_t start_timestamp;
|
||||
};
|
||||
|
||||
static struct kmem_cache *amdgpu_fence_slab;
|
||||
|
@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
|
|||
}
|
||||
}
|
||||
|
||||
to_amdgpu_fence(fence)->start_timestamp = ktime_get();
|
||||
|
||||
/* This function can't be called concurrently anyway, otherwise
|
||||
* emitting the fence would mess up the hardware ring buffer.
|
||||
*/
|
||||
|
@ -406,6 +409,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
|
|||
return lower_32_bits(emitted);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
|
||||
* @ring: ring the fence is associated with
|
||||
*
|
||||
* Find the earliest fence unsignaled until now, calculate the time delta
|
||||
* between the time fence emitted and now.
|
||||
*/
|
||||
u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_fence_driver *drv = &ring->fence_drv;
|
||||
struct dma_fence *fence;
|
||||
uint32_t last_seq, sync_seq;
|
||||
|
||||
last_seq = atomic_read(&ring->fence_drv.last_seq);
|
||||
sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
|
||||
if (last_seq == sync_seq)
|
||||
return 0;
|
||||
|
||||
++last_seq;
|
||||
last_seq &= drv->num_fences_mask;
|
||||
fence = drv->fences[last_seq];
|
||||
if (!fence)
|
||||
return 0;
|
||||
|
||||
return ktime_us_delta(ktime_get(),
|
||||
to_amdgpu_fence(fence)->start_timestamp);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_fence_update_start_timestamp - update the timestamp of the fence
|
||||
* @ring: ring the fence is associated with
|
||||
* @seq: the fence seq number to update.
|
||||
* @timestamp: the start timestamp to update.
|
||||
*
|
||||
* The function called at the time the fence and related ib is about to
|
||||
* resubmit to gpu in MCBP scenario. Thus we do not consider race condition
|
||||
* with amdgpu_fence_process to modify the same fence.
|
||||
*/
|
||||
void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
|
||||
{
|
||||
struct amdgpu_fence_driver *drv = &ring->fence_drv;
|
||||
struct dma_fence *fence;
|
||||
|
||||
seq &= drv->num_fences_mask;
|
||||
fence = drv->fences[seq];
|
||||
if (!fence)
|
||||
return;
|
||||
|
||||
to_amdgpu_fence(fence)->start_timestamp = timestamp;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_fence_driver_start_ring - make the fence driver
|
||||
* ready for use on the requested ring.
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "amdgpu_imu.h"
|
||||
#include "soc15.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_ring_mux.h"
|
||||
|
||||
/* GFX current status */
|
||||
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
|
||||
|
@ -352,6 +353,9 @@ struct amdgpu_gfx {
|
|||
struct amdgpu_gfx_ras *ras;
|
||||
|
||||
bool is_poweron;
|
||||
|
||||
struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
|
||||
struct amdgpu_ring_mux muxer;
|
||||
};
|
||||
|
||||
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
|
||||
|
|
|
@ -51,6 +51,8 @@
|
|||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_hmm.h"
|
||||
|
||||
#define MAX_WALK_BYTE (2UL << 30)
|
||||
|
||||
/**
|
||||
* amdgpu_hmm_invalidate_gfx - callback to notify about mm change
|
||||
*
|
||||
|
@ -163,6 +165,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
|
|||
struct hmm_range **phmm_range)
|
||||
{
|
||||
struct hmm_range *hmm_range;
|
||||
unsigned long end;
|
||||
unsigned long timeout;
|
||||
unsigned long i;
|
||||
unsigned long *pfns;
|
||||
|
@ -184,25 +187,42 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
|
|||
hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
|
||||
hmm_range->hmm_pfns = pfns;
|
||||
hmm_range->start = start;
|
||||
hmm_range->end = start + npages * PAGE_SIZE;
|
||||
end = start + npages * PAGE_SIZE;
|
||||
hmm_range->dev_private_owner = owner;
|
||||
|
||||
/* Assuming 512MB takes maxmium 1 second to fault page address */
|
||||
timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
|
||||
timeout = jiffies + msecs_to_jiffies(timeout);
|
||||
do {
|
||||
hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
|
||||
|
||||
pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
|
||||
hmm_range->start, hmm_range->end);
|
||||
|
||||
/* Assuming 512MB takes maxmium 1 second to fault page address */
|
||||
timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL);
|
||||
timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
|
||||
timeout = jiffies + msecs_to_jiffies(timeout);
|
||||
|
||||
retry:
|
||||
hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
|
||||
r = hmm_range_fault(hmm_range);
|
||||
if (unlikely(r)) {
|
||||
/*
|
||||
* FIXME: This timeout should encompass the retry from
|
||||
* mmu_interval_read_retry() as well.
|
||||
*/
|
||||
if (r == -EBUSY && !time_after(jiffies, timeout))
|
||||
goto retry;
|
||||
goto out_free_pfns;
|
||||
}
|
||||
hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
|
||||
r = hmm_range_fault(hmm_range);
|
||||
if (unlikely(r)) {
|
||||
/*
|
||||
* FIXME: This timeout should encompass the retry from
|
||||
* mmu_interval_read_retry() as well.
|
||||
*/
|
||||
if (r == -EBUSY && !time_after(jiffies, timeout))
|
||||
goto retry;
|
||||
goto out_free_pfns;
|
||||
}
|
||||
|
||||
if (hmm_range->end == end)
|
||||
break;
|
||||
hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
|
||||
hmm_range->start = hmm_range->end;
|
||||
schedule();
|
||||
} while (hmm_range->end < end);
|
||||
|
||||
hmm_range->start = start;
|
||||
hmm_range->hmm_pfns = pfns;
|
||||
|
||||
/*
|
||||
* Due to default_flags, all pages are HMM_PFN_VALID or
|
||||
|
|
|
@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
}
|
||||
}
|
||||
|
||||
amdgpu_ring_ib_begin(ring);
|
||||
if (job && ring->funcs->init_cond_exec)
|
||||
patch_offset = amdgpu_ring_init_cond_exec(ring);
|
||||
|
||||
|
@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
|
||||
ring->funcs->emit_wave_limit(ring, false);
|
||||
|
||||
amdgpu_ring_ib_end(ring);
|
||||
amdgpu_ring_commit(ring);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -431,7 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
case AMDGPU_HW_IP_VCN_DEC:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
if (adev->uvd.harvest_config & (1 << i))
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
if (adev->vcn.inst[i].ring_dec.sched.ready)
|
||||
|
@ -443,7 +443,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
case AMDGPU_HW_IP_VCN_ENC:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
if (adev->uvd.harvest_config & (1 << i))
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
for (j = 0; j < adev->vcn.num_enc_rings; j++)
|
||||
|
|
|
@ -165,6 +165,7 @@ static int psp_early_init(void *handle)
|
|||
case IP_VERSION(13, 0, 5):
|
||||
case IP_VERSION(13, 0, 8):
|
||||
case IP_VERSION(13, 0, 10):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
psp_v13_0_set_psp_funcs(psp);
|
||||
psp->autoload_supported = true;
|
||||
break;
|
||||
|
@ -512,26 +513,22 @@ static int psp_sw_fini(void *handle)
|
|||
struct psp_gfx_cmd_resp *cmd = psp->cmd;
|
||||
|
||||
psp_memory_training_fini(psp);
|
||||
if (psp->sos_fw) {
|
||||
release_firmware(psp->sos_fw);
|
||||
psp->sos_fw = NULL;
|
||||
}
|
||||
if (psp->asd_fw) {
|
||||
release_firmware(psp->asd_fw);
|
||||
psp->asd_fw = NULL;
|
||||
}
|
||||
if (psp->ta_fw) {
|
||||
release_firmware(psp->ta_fw);
|
||||
psp->ta_fw = NULL;
|
||||
}
|
||||
if (psp->cap_fw) {
|
||||
release_firmware(psp->cap_fw);
|
||||
psp->cap_fw = NULL;
|
||||
}
|
||||
if (psp->toc_fw) {
|
||||
release_firmware(psp->toc_fw);
|
||||
psp->toc_fw = NULL;
|
||||
}
|
||||
|
||||
release_firmware(psp->sos_fw);
|
||||
psp->sos_fw = NULL;
|
||||
|
||||
release_firmware(psp->asd_fw);
|
||||
psp->asd_fw = NULL;
|
||||
|
||||
release_firmware(psp->ta_fw);
|
||||
psp->ta_fw = NULL;
|
||||
|
||||
release_firmware(psp->cap_fw);
|
||||
psp->cap_fw = NULL;
|
||||
|
||||
release_firmware(psp->toc_fw);
|
||||
psp->toc_fw = NULL;
|
||||
|
||||
if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
|
||||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7))
|
||||
psp_sysfs_fini(adev);
|
||||
|
@ -861,7 +858,7 @@ static int psp_tmr_unload(struct psp_context *psp)
|
|||
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
|
||||
|
||||
psp_prep_tmr_unload_cmd_buf(psp, cmd);
|
||||
dev_info(psp->adev->dev, "free PSP TMR buffer\n");
|
||||
dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
|
|
|
@ -1267,7 +1267,7 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
|
|||
struct amdgpu_ras *con =
|
||||
container_of(attr, struct amdgpu_ras, features_attr);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
|
||||
return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
|
||||
}
|
||||
|
||||
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
|
||||
|
|
|
@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
|
|||
|
||||
return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
|
||||
}
|
||||
|
||||
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
|
||||
{
|
||||
if (ring->is_sw_ring)
|
||||
amdgpu_sw_ring_ib_begin(ring);
|
||||
}
|
||||
|
||||
void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
|
||||
{
|
||||
if (ring->is_sw_ring)
|
||||
amdgpu_sw_ring_ib_end(ring);
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ struct amdgpu_vm;
|
|||
#define AMDGPU_MAX_RINGS 28
|
||||
#define AMDGPU_MAX_HWIP_RINGS 8
|
||||
#define AMDGPU_MAX_GFX_RINGS 2
|
||||
#define AMDGPU_MAX_SW_GFX_RINGS 2
|
||||
#define AMDGPU_MAX_COMPUTE_RINGS 8
|
||||
#define AMDGPU_MAX_VCE_RINGS 3
|
||||
#define AMDGPU_MAX_UVD_ENC_RINGS 2
|
||||
|
@ -59,6 +60,7 @@ enum amdgpu_ring_priority_level {
|
|||
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
|
||||
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
|
||||
#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
|
||||
#define AMDGPU_FENCE_FLAG_EXEC (1 << 3)
|
||||
|
||||
#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
|
||||
|
||||
|
@ -143,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
|
|||
uint32_t wait_seq,
|
||||
signed long timeout);
|
||||
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
|
||||
|
||||
void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
|
||||
|
||||
u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
|
||||
void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
|
||||
ktime_t timestamp);
|
||||
|
||||
/*
|
||||
* Rings.
|
||||
*/
|
||||
|
@ -279,6 +286,10 @@ struct amdgpu_ring {
|
|||
bool is_mes_queue;
|
||||
uint32_t hw_queue_id;
|
||||
struct amdgpu_mes_ctx_data *mes_ctx;
|
||||
|
||||
bool is_sw_ring;
|
||||
unsigned int entry_index;
|
||||
|
||||
};
|
||||
|
||||
#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
|
||||
|
@ -307,6 +318,9 @@ struct amdgpu_ring {
|
|||
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
|
||||
|
||||
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
|
||||
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
|
||||
|
||||
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
|
||||
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
|
||||
void amdgpu_ring_commit(struct amdgpu_ring *ring);
|
||||
|
|
|
@ -0,0 +1,516 @@
|
|||
/*
|
||||
* Copyright 2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <linux/slab.h>
|
||||
#include <drm/drm_print.h>
|
||||
|
||||
#include "amdgpu_ring_mux.h"
|
||||
#include "amdgpu_ring.h"
|
||||
#include "amdgpu.h"
|
||||
|
||||
#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
|
||||
#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
|
||||
|
||||
static const struct ring_info {
|
||||
unsigned int hw_pio;
|
||||
const char *ring_name;
|
||||
} sw_ring_info[] = {
|
||||
{ AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
|
||||
{ AMDGPU_RING_PRIO_2, "gfx_high"},
|
||||
};
|
||||
|
||||
static struct kmem_cache *amdgpu_mux_chunk_slab;
|
||||
|
||||
static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
return ring->entry_index < mux->ring_entry_size ?
|
||||
&mux->ring_entry[ring->entry_index] : NULL;
|
||||
}
|
||||
|
||||
/* copy packages on sw ring range[begin, end) */
|
||||
static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
|
||||
struct amdgpu_ring *ring,
|
||||
u64 s_start, u64 s_end)
|
||||
{
|
||||
u64 start, end;
|
||||
struct amdgpu_ring *real_ring = mux->real_ring;
|
||||
|
||||
start = s_start & ring->buf_mask;
|
||||
end = s_end & ring->buf_mask;
|
||||
|
||||
if (start == end) {
|
||||
DRM_ERROR("no more data copied from sw ring\n");
|
||||
return;
|
||||
}
|
||||
if (start > end) {
|
||||
amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start);
|
||||
amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start],
|
||||
(ring->ring_size >> 2) - start);
|
||||
amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
|
||||
} else {
|
||||
amdgpu_ring_alloc(real_ring, end - start);
|
||||
amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start);
|
||||
}
|
||||
}
|
||||
|
||||
static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
|
||||
{
|
||||
struct amdgpu_mux_entry *e = NULL;
|
||||
struct amdgpu_mux_chunk *chunk;
|
||||
uint32_t seq, last_seq;
|
||||
int i;
|
||||
|
||||
/*find low priority entries:*/
|
||||
if (!mux->s_resubmit)
|
||||
return;
|
||||
|
||||
for (i = 0; i < mux->num_ring_entries; i++) {
|
||||
if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
|
||||
e = &mux->ring_entry[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!e) {
|
||||
DRM_ERROR("%s no low priority ring found\n", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
last_seq = atomic_read(&e->ring->fence_drv.last_seq);
|
||||
seq = mux->seqno_to_resubmit;
|
||||
if (last_seq < seq) {
|
||||
/*resubmit all the fences between (last_seq, seq]*/
|
||||
list_for_each_entry(chunk, &e->list, entry) {
|
||||
if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
|
||||
amdgpu_fence_update_start_timestamp(e->ring,
|
||||
chunk->sync_seq,
|
||||
ktime_get());
|
||||
amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
|
||||
chunk->start,
|
||||
chunk->end);
|
||||
mux->wptr_resubmit = chunk->end;
|
||||
amdgpu_ring_commit(mux->real_ring);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
del_timer(&mux->resubmit_timer);
|
||||
mux->s_resubmit = false;
|
||||
}
|
||||
|
||||
static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
|
||||
{
|
||||
mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
|
||||
}
|
||||
|
||||
static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
|
||||
{
|
||||
struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
|
||||
|
||||
if (!spin_trylock(&mux->lock)) {
|
||||
amdgpu_ring_mux_schedule_resubmit(mux);
|
||||
DRM_ERROR("reschedule resubmit\n");
|
||||
return;
|
||||
}
|
||||
amdgpu_mux_resubmit_chunks(mux);
|
||||
spin_unlock(&mux->lock);
|
||||
}
|
||||
|
||||
int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
|
||||
unsigned int entry_size)
|
||||
{
|
||||
mux->real_ring = ring;
|
||||
mux->num_ring_entries = 0;
|
||||
|
||||
mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
|
||||
if (!mux->ring_entry)
|
||||
return -ENOMEM;
|
||||
|
||||
mux->ring_entry_size = entry_size;
|
||||
mux->s_resubmit = false;
|
||||
|
||||
amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk",
|
||||
sizeof(struct amdgpu_mux_chunk), 0,
|
||||
SLAB_HWCACHE_ALIGN, NULL);
|
||||
if (!amdgpu_mux_chunk_slab) {
|
||||
DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
spin_lock_init(&mux->lock);
|
||||
timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
struct amdgpu_mux_chunk *chunk, *chunk2;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mux->num_ring_entries; i++) {
|
||||
e = &mux->ring_entry[i];
|
||||
list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
|
||||
list_del(&chunk->entry);
|
||||
kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
|
||||
}
|
||||
}
|
||||
kmem_cache_destroy(amdgpu_mux_chunk_slab);
|
||||
kfree(mux->ring_entry);
|
||||
mux->ring_entry = NULL;
|
||||
mux->num_ring_entries = 0;
|
||||
mux->ring_entry_size = 0;
|
||||
}
|
||||
|
||||
int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
|
||||
if (mux->num_ring_entries >= mux->ring_entry_size) {
|
||||
DRM_ERROR("add sw ring exceeding max entry size\n");
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
e = &mux->ring_entry[mux->num_ring_entries];
|
||||
ring->entry_index = mux->num_ring_entries;
|
||||
e->ring = ring;
|
||||
|
||||
INIT_LIST_HEAD(&e->list);
|
||||
mux->num_ring_entries += 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
|
||||
spin_lock(&mux->lock);
|
||||
|
||||
if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
|
||||
amdgpu_mux_resubmit_chunks(mux);
|
||||
|
||||
e = amdgpu_ring_mux_sw_entry(mux, ring);
|
||||
if (!e) {
|
||||
DRM_ERROR("cannot find entry for sw ring\n");
|
||||
spin_unlock(&mux->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/* We could skip this set wptr as preemption in process. */
|
||||
if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
|
||||
spin_unlock(&mux->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
e->sw_cptr = e->sw_wptr;
|
||||
/* Update cptr if the package already copied in resubmit functions */
|
||||
if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
|
||||
e->sw_cptr = mux->wptr_resubmit;
|
||||
e->sw_wptr = wptr;
|
||||
e->start_ptr_in_hw_ring = mux->real_ring->wptr;
|
||||
|
||||
/* Skip copying for the packages already resubmitted.*/
|
||||
if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) {
|
||||
amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr);
|
||||
e->end_ptr_in_hw_ring = mux->real_ring->wptr;
|
||||
amdgpu_ring_commit(mux->real_ring);
|
||||
} else {
|
||||
e->end_ptr_in_hw_ring = mux->real_ring->wptr;
|
||||
}
|
||||
spin_unlock(&mux->lock);
|
||||
}
|
||||
|
||||
u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
|
||||
e = amdgpu_ring_mux_sw_entry(mux, ring);
|
||||
if (!e) {
|
||||
DRM_ERROR("cannot find entry for sw ring\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return e->sw_wptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ring_mux_get_rptr - get the readptr of the software ring
|
||||
* @mux: the multiplexer the software rings attach to
|
||||
* @ring: the software ring of which we calculate the readptr
|
||||
*
|
||||
* The return value of the readptr is not precise while the other rings could
|
||||
* write data onto the real ring buffer.After overwriting on the real ring, we
|
||||
* can not decide if our packages have been excuted or not read yet. However,
|
||||
* this function is only called by the tools such as umr to collect the latest
|
||||
* packages for the hang analysis. We assume the hang happens near our latest
|
||||
* submit. Thus we could use the following logic to give the clue:
|
||||
* If the readptr is between start and end, then we return the copy pointer
|
||||
* plus the distance from start to readptr. If the readptr is before start, we
|
||||
* return the copy pointer. Lastly, if the readptr is past end, we return the
|
||||
* write pointer.
|
||||
*/
|
||||
u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
u64 readp, offset, start, end;
|
||||
|
||||
e = amdgpu_ring_mux_sw_entry(mux, ring);
|
||||
if (!e) {
|
||||
DRM_ERROR("no sw entry found!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
readp = amdgpu_ring_get_rptr(mux->real_ring);
|
||||
|
||||
start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
|
||||
end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
|
||||
if (start > end) {
|
||||
if (readp <= end)
|
||||
readp += mux->real_ring->ring_size >> 2;
|
||||
end += mux->real_ring->ring_size >> 2;
|
||||
}
|
||||
|
||||
if (start <= readp && readp <= end) {
|
||||
offset = readp - start;
|
||||
e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
|
||||
} else if (readp < start) {
|
||||
e->sw_rptr = e->sw_cptr;
|
||||
} else {
|
||||
/* end < readptr */
|
||||
e->sw_rptr = e->sw_wptr;
|
||||
}
|
||||
|
||||
return e->sw_rptr;
|
||||
}
|
||||
|
||||
u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
|
||||
|
||||
WARN_ON(!ring->is_sw_ring);
|
||||
return amdgpu_ring_mux_get_rptr(mux, ring);
|
||||
}
|
||||
|
||||
u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
|
||||
|
||||
WARN_ON(!ring->is_sw_ring);
|
||||
return amdgpu_ring_mux_get_wptr(mux, ring);
|
||||
}
|
||||
|
||||
void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
|
||||
|
||||
WARN_ON(!ring->is_sw_ring);
|
||||
amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr);
|
||||
}
|
||||
|
||||
/* Override insert_nop to prevent emitting nops to the software rings */
|
||||
void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
{
|
||||
WARN_ON(!ring->is_sw_ring);
|
||||
}
|
||||
|
||||
const char *amdgpu_sw_ring_name(int idx)
|
||||
{
|
||||
return idx < ARRAY_SIZE(sw_ring_info) ?
|
||||
sw_ring_info[idx].ring_name : NULL;
|
||||
}
|
||||
|
||||
unsigned int amdgpu_sw_ring_priority(int idx)
|
||||
{
|
||||
return idx < ARRAY_SIZE(sw_ring_info) ?
|
||||
sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
|
||||
}
|
||||
|
||||
/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/
|
||||
int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
|
||||
{
|
||||
struct amdgpu_ring *ring;
|
||||
int i, need_preempt;
|
||||
|
||||
need_preempt = 0;
|
||||
for (i = 0; i < mux->num_ring_entries; i++) {
|
||||
ring = mux->ring_entry[i].ring;
|
||||
if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
|
||||
amdgpu_fence_count_emitted(ring) > 0)
|
||||
return 0;
|
||||
if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
|
||||
amdgpu_fence_last_unsignaled_time_us(ring) >
|
||||
AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
|
||||
need_preempt = 1;
|
||||
}
|
||||
return need_preempt && !mux->s_resubmit;
|
||||
}
|
||||
|
||||
/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */
|
||||
int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
|
||||
{
|
||||
int r;
|
||||
|
||||
spin_lock(&mux->lock);
|
||||
mux->pending_trailing_fence_signaled = true;
|
||||
r = amdgpu_ring_preempt_ib(mux->real_ring);
|
||||
spin_unlock(&mux->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
|
||||
|
||||
WARN_ON(!ring->is_sw_ring);
|
||||
if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
|
||||
if (amdgpu_mcbp_scan(mux) > 0)
|
||||
amdgpu_mcbp_trigger_preempt(mux);
|
||||
return;
|
||||
}
|
||||
|
||||
amdgpu_ring_mux_start_ib(mux, ring);
|
||||
}
|
||||
|
||||
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
|
||||
|
||||
WARN_ON(!ring->is_sw_ring);
|
||||
if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
|
||||
return;
|
||||
amdgpu_ring_mux_end_ib(mux, ring);
|
||||
}
|
||||
|
||||
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
struct amdgpu_mux_chunk *chunk;
|
||||
|
||||
spin_lock(&mux->lock);
|
||||
amdgpu_mux_resubmit_chunks(mux);
|
||||
spin_unlock(&mux->lock);
|
||||
|
||||
e = amdgpu_ring_mux_sw_entry(mux, ring);
|
||||
if (!e) {
|
||||
DRM_ERROR("cannot find entry!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
|
||||
if (!chunk) {
|
||||
DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
chunk->start = ring->wptr;
|
||||
list_add_tail(&chunk->entry, &e->list);
|
||||
}
|
||||
|
||||
static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
|
||||
{
|
||||
uint32_t last_seq, size = 0;
|
||||
struct amdgpu_mux_entry *e;
|
||||
struct amdgpu_mux_chunk *chunk, *tmp;
|
||||
|
||||
e = amdgpu_ring_mux_sw_entry(mux, ring);
|
||||
if (!e) {
|
||||
DRM_ERROR("cannot find entry!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
last_seq = atomic_read(&ring->fence_drv.last_seq);
|
||||
|
||||
list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
|
||||
if (chunk->sync_seq <= last_seq) {
|
||||
list_del(&chunk->entry);
|
||||
kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
|
||||
} else {
|
||||
size++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
struct amdgpu_mux_chunk *chunk;
|
||||
|
||||
e = amdgpu_ring_mux_sw_entry(mux, ring);
|
||||
if (!e) {
|
||||
DRM_ERROR("cannot find entry!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
|
||||
if (!chunk) {
|
||||
DRM_ERROR("cannot find chunk!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
chunk->end = ring->wptr;
|
||||
chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
|
||||
|
||||
scan_and_remove_signaled_chunk(mux, ring);
|
||||
}
|
||||
|
||||
bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
struct amdgpu_ring *ring = NULL;
|
||||
int i;
|
||||
|
||||
if (!mux->pending_trailing_fence_signaled)
|
||||
return false;
|
||||
|
||||
if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
|
||||
return false;
|
||||
|
||||
for (i = 0; i < mux->num_ring_entries; i++) {
|
||||
e = &mux->ring_entry[i];
|
||||
if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
|
||||
ring = e->ring;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ring) {
|
||||
DRM_ERROR("cannot find low priority ring\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
amdgpu_fence_process(ring);
|
||||
if (amdgpu_fence_count_emitted(ring) > 0) {
|
||||
mux->s_resubmit = true;
|
||||
mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
|
||||
amdgpu_ring_mux_schedule_resubmit(mux);
|
||||
}
|
||||
|
||||
mux->pending_trailing_fence_signaled = false;
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright 2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __AMDGPU_RING_MUX__
|
||||
#define __AMDGPU_RING_MUX__
|
||||
|
||||
#include <linux/timer.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include "amdgpu_ring.h"
|
||||
|
||||
struct amdgpu_ring;
|
||||
|
||||
/**
|
||||
* struct amdgpu_mux_entry - the entry recording software rings copying information.
|
||||
* @ring: the pointer to the software ring.
|
||||
* @start_ptr_in_hw_ring: last start location copied to in the hardware ring.
|
||||
* @end_ptr_in_hw_ring: last end location copied to in the hardware ring.
|
||||
* @sw_cptr: the position of the copy pointer in the sw ring.
|
||||
* @sw_rptr: the read pointer in software ring.
|
||||
* @sw_wptr: the write pointer in software ring.
|
||||
* @list: list head for amdgpu_mux_chunk
|
||||
*/
|
||||
struct amdgpu_mux_entry {
|
||||
struct amdgpu_ring *ring;
|
||||
u64 start_ptr_in_hw_ring;
|
||||
u64 end_ptr_in_hw_ring;
|
||||
u64 sw_cptr;
|
||||
u64 sw_rptr;
|
||||
u64 sw_wptr;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct amdgpu_ring_mux {
|
||||
struct amdgpu_ring *real_ring;
|
||||
|
||||
struct amdgpu_mux_entry *ring_entry;
|
||||
unsigned int num_ring_entries;
|
||||
unsigned int ring_entry_size;
|
||||
/*the lock for copy data from different software rings*/
|
||||
spinlock_t lock;
|
||||
bool s_resubmit;
|
||||
uint32_t seqno_to_resubmit;
|
||||
u64 wptr_resubmit;
|
||||
struct timer_list resubmit_timer;
|
||||
|
||||
bool pending_trailing_fence_signaled;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings.
|
||||
* @entry: the list entry.
|
||||
* @sync_seq: the fence seqno related with the saved IB.
|
||||
* @start:- start location on the software ring.
|
||||
* @end:- end location on the software ring.
|
||||
*/
|
||||
struct amdgpu_mux_chunk {
|
||||
struct list_head entry;
|
||||
uint32_t sync_seq;
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
|
||||
unsigned int entry_size);
|
||||
void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
|
||||
int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
|
||||
u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
|
||||
u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
|
||||
bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
|
||||
|
||||
u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
|
||||
u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
|
||||
void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
|
||||
void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
|
||||
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
|
||||
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
|
||||
const char *amdgpu_sw_ring_name(int idx);
|
||||
unsigned int amdgpu_sw_ring_priority(int idx);
|
||||
|
||||
#endif
|
|
@ -1545,7 +1545,7 @@ static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
|
|||
{
|
||||
amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
|
||||
NULL,
|
||||
NULL);
|
||||
&adev->mman.drv_vram_usage_va);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1583,8 +1583,9 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
|
|||
*/
|
||||
static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t vram_size = adev->gmc.visible_vram_size;
|
||||
u64 vram_size = adev->gmc.visible_vram_size;
|
||||
|
||||
adev->mman.drv_vram_usage_va = NULL;
|
||||
adev->mman.drv_vram_usage_reserved_bo = NULL;
|
||||
|
||||
if (adev->mman.drv_vram_usage_size == 0 ||
|
||||
|
@ -1596,7 +1597,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
|
|||
adev->mman.drv_vram_usage_size,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->mman.drv_vram_usage_reserved_bo,
|
||||
NULL);
|
||||
&adev->mman.drv_vram_usage_va);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -90,6 +90,7 @@ struct amdgpu_mman {
|
|||
u64 drv_vram_usage_start_offset;
|
||||
u64 drv_vram_usage_size;
|
||||
struct amdgpu_bo *drv_vram_usage_reserved_bo;
|
||||
void *drv_vram_usage_va;
|
||||
|
||||
/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
|
||||
struct amdgpu_bo *sdma_access_bo;
|
||||
|
|
|
@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
|
|||
break;
|
||||
case IP_VERSION(3, 0, 2):
|
||||
fw_name = FIRMWARE_VANGOGH;
|
||||
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
|
||||
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
|
||||
adev->vcn.indirect_sram = true;
|
||||
break;
|
||||
case IP_VERSION(3, 0, 16):
|
||||
fw_name = FIRMWARE_DIMGREY_CAVEFISH;
|
||||
|
|
|
@ -428,11 +428,17 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
|
|||
struct eeprom_table_record bp;
|
||||
uint64_t retired_page;
|
||||
uint32_t bp_idx, bp_cnt;
|
||||
void *vram_usage_va = NULL;
|
||||
|
||||
if (adev->mman.fw_vram_usage_va)
|
||||
vram_usage_va = adev->mman.fw_vram_usage_va;
|
||||
else
|
||||
vram_usage_va = adev->mman.drv_vram_usage_va;
|
||||
|
||||
if (bp_block_size) {
|
||||
bp_cnt = bp_block_size / sizeof(uint64_t);
|
||||
for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
|
||||
retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va +
|
||||
retired_page = *(uint64_t *)(vram_usage_va +
|
||||
bp_block_offset + bp_idx * sizeof(uint64_t));
|
||||
bp.retired_page = retired_page;
|
||||
|
||||
|
@ -643,7 +649,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
|
|||
adev->virt.fw_reserve.p_vf2pf = NULL;
|
||||
adev->virt.vf2pf_update_interval_ms = 0;
|
||||
|
||||
if (adev->mman.fw_vram_usage_va != NULL) {
|
||||
if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
|
||||
DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
|
||||
} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
|
||||
/* go through this logic in ip_init and reset to init workqueue*/
|
||||
amdgpu_virt_exchange_data(adev);
|
||||
|
||||
|
@ -666,32 +674,40 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
|
|||
uint32_t bp_block_size = 0;
|
||||
struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
|
||||
|
||||
if (adev->mman.fw_vram_usage_va != NULL) {
|
||||
|
||||
adev->virt.fw_reserve.p_pf2vf =
|
||||
(struct amd_sriov_msg_pf2vf_info_header *)
|
||||
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
|
||||
adev->virt.fw_reserve.p_vf2pf =
|
||||
(struct amd_sriov_msg_vf2pf_info_header *)
|
||||
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
|
||||
if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
|
||||
if (adev->mman.fw_vram_usage_va) {
|
||||
adev->virt.fw_reserve.p_pf2vf =
|
||||
(struct amd_sriov_msg_pf2vf_info_header *)
|
||||
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
|
||||
adev->virt.fw_reserve.p_vf2pf =
|
||||
(struct amd_sriov_msg_vf2pf_info_header *)
|
||||
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
|
||||
} else if (adev->mman.drv_vram_usage_va) {
|
||||
adev->virt.fw_reserve.p_pf2vf =
|
||||
(struct amd_sriov_msg_pf2vf_info_header *)
|
||||
(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
|
||||
adev->virt.fw_reserve.p_vf2pf =
|
||||
(struct amd_sriov_msg_vf2pf_info_header *)
|
||||
(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
|
||||
}
|
||||
|
||||
amdgpu_virt_read_pf2vf_data(adev);
|
||||
amdgpu_virt_write_vf2pf_data(adev);
|
||||
|
||||
/* bad page handling for version 2 */
|
||||
if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
|
||||
pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
|
||||
pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
|
||||
|
||||
bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
|
||||
((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
|
||||
bp_block_size = pf2vf_v2->bp_block_size;
|
||||
bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
|
||||
((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
|
||||
bp_block_size = pf2vf_v2->bp_block_size;
|
||||
|
||||
if (bp_block_size && !adev->virt.ras_init_done)
|
||||
amdgpu_virt_init_ras_err_handler_data(adev);
|
||||
if (bp_block_size && !adev->virt.ras_init_done)
|
||||
amdgpu_virt_init_ras_err_handler_data(adev);
|
||||
|
||||
if (adev->virt.ras_init_done)
|
||||
amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
|
||||
}
|
||||
if (adev->virt.ras_init_done)
|
||||
amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -45,22 +45,43 @@
|
|||
/**
|
||||
* DOC: GPUVM
|
||||
*
|
||||
* GPUVM is similar to the legacy gart on older asics, however
|
||||
* rather than there being a single global gart table
|
||||
* for the entire GPU, there are multiple VM page tables active
|
||||
* at any given time. The VM page tables can contain a mix
|
||||
* vram pages and system memory pages and system memory pages
|
||||
* GPUVM is the MMU functionality provided on the GPU.
|
||||
* GPUVM is similar to the legacy GART on older asics, however
|
||||
* rather than there being a single global GART table
|
||||
* for the entire GPU, there can be multiple GPUVM page tables active
|
||||
* at any given time. The GPUVM page tables can contain a mix
|
||||
* VRAM pages and system pages (both memory and MMIO) and system pages
|
||||
* can be mapped as snooped (cached system pages) or unsnooped
|
||||
* (uncached system pages).
|
||||
* Each VM has an ID associated with it and there is a page table
|
||||
* associated with each VMID. When executing a command buffer,
|
||||
* the kernel tells the ring what VMID to use for that command
|
||||
*
|
||||
* Each active GPUVM has an ID associated with it and there is a page table
|
||||
* linked with each VMID. When executing a command buffer,
|
||||
* the kernel tells the engine what VMID to use for that command
|
||||
* buffer. VMIDs are allocated dynamically as commands are submitted.
|
||||
* The userspace drivers maintain their own address space and the kernel
|
||||
* sets up their pages tables accordingly when they submit their
|
||||
* command buffers and a VMID is assigned.
|
||||
* Cayman/Trinity support up to 8 active VMs at any given time;
|
||||
* SI supports 16.
|
||||
* The hardware supports up to 16 active GPUVMs at any given time.
|
||||
*
|
||||
* Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
|
||||
* on the ASIC family. GPUVM supports RWX attributes on each page as well
|
||||
* as other features such as encryption and caching attributes.
|
||||
*
|
||||
* VMID 0 is special. It is the GPUVM used for the kernel driver. In
|
||||
* addition to an aperture managed by a page table, VMID 0 also has
|
||||
* several other apertures. There is an aperture for direct access to VRAM
|
||||
* and there is a legacy AGP aperture which just forwards accesses directly
|
||||
* to the matching system physical addresses (or IOVAs when an IOMMU is
|
||||
* present). These apertures provide direct access to these memories without
|
||||
* incurring the overhead of a page table. VMID 0 is used by the kernel
|
||||
* driver for tasks like memory management.
|
||||
*
|
||||
* GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
|
||||
* For user applications, each application can have their own unique GPUVM
|
||||
* address space. The application manages the address space and the kernel
|
||||
* driver manages the GPUVM page tables for each process. If an GPU client
|
||||
* accesses an invalid page, it will generate a GPU page fault, similar to
|
||||
* accessing an invalid page on a CPU.
|
||||
*/
|
||||
|
||||
#define START(node) ((node)->start)
|
||||
|
@ -541,6 +562,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
|||
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
|
||||
return 0;
|
||||
|
||||
amdgpu_ring_ib_begin(ring);
|
||||
if (ring->funcs->init_cond_exec)
|
||||
patch_offset = amdgpu_ring_init_cond_exec(ring);
|
||||
|
||||
|
@ -601,6 +623,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
|||
amdgpu_ring_emit_switch_buffer(ring);
|
||||
amdgpu_ring_emit_switch_buffer(ring);
|
||||
}
|
||||
amdgpu_ring_ib_end(ring);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -386,7 +386,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
|||
if (ret) {
|
||||
dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
|
||||
kobject_put(&hive->kobj);
|
||||
kfree(hive);
|
||||
hive = NULL;
|
||||
goto pro_end;
|
||||
}
|
||||
|
@ -410,7 +409,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
|||
dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
|
||||
ret = -ENOMEM;
|
||||
kobject_put(&hive->kobj);
|
||||
kfree(hive);
|
||||
hive = NULL;
|
||||
goto pro_end;
|
||||
}
|
||||
|
|
|
@ -77,6 +77,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
|
|||
MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
|
||||
{
|
||||
|
@ -262,6 +266,7 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
{
|
||||
switch (adev->ip_versions[GC_HWIP][0]) {
|
||||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_gc_11_0_1,
|
||||
(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
|
||||
|
@ -855,6 +860,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
|
|||
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
adev->gfx.config.max_hw_contexts = 8;
|
||||
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
|
||||
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
|
||||
|
@ -1284,6 +1290,7 @@ static int gfx_v11_0_sw_init(void *handle)
|
|||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
adev->gfx.me.num_me = 1;
|
||||
adev->gfx.me.num_pipe_per_me = 1;
|
||||
adev->gfx.me.num_queue_per_pipe = 1;
|
||||
|
@ -2486,7 +2493,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
|
|||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
|
||||
|
||||
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1))
|
||||
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
|
||||
adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
|
||||
bootload_status = RREG32_SOC15(GC, 0,
|
||||
regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
|
||||
else
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
|
||||
#include "amdgpu_ras.h"
|
||||
|
||||
#include "amdgpu_ring_mux.h"
|
||||
#include "gfx_v9_4.h"
|
||||
#include "gfx_v9_0.h"
|
||||
#include "gfx_v9_4_2.h"
|
||||
|
@ -56,6 +57,7 @@
|
|||
#include "asic_reg/gc/gc_9_0_default.h"
|
||||
|
||||
#define GFX9_NUM_GFX_RINGS 1
|
||||
#define GFX9_NUM_SW_GFX_RINGS 2
|
||||
#define GFX9_MEC_HPD_SIZE 4096
|
||||
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
|
||||
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
|
||||
|
@ -753,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
|
|||
static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
|
||||
struct amdgpu_cu_info *cu_info);
|
||||
static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
|
||||
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
|
||||
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
|
||||
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
|
||||
static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
|
||||
void *ras_error_status);
|
||||
|
@ -826,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
|
|||
PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
|
||||
|
||||
if (action == PREEMPT_QUEUES_NO_UNMAP) {
|
||||
amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
|
||||
amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
|
||||
amdgpu_ring_write(kiq_ring, seq);
|
||||
amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
|
||||
amdgpu_ring_write(kiq_ring, 0);
|
||||
amdgpu_ring_write(kiq_ring, 0);
|
||||
|
||||
} else {
|
||||
amdgpu_ring_write(kiq_ring, 0);
|
||||
amdgpu_ring_write(kiq_ring, 0);
|
||||
|
@ -2103,6 +2106,7 @@ static int gfx_v9_0_sw_init(void *handle)
|
|||
struct amdgpu_ring *ring;
|
||||
struct amdgpu_kiq *kiq;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
unsigned int hw_prio;
|
||||
|
||||
switch (adev->ip_versions[GC_HWIP][0]) {
|
||||
case IP_VERSION(9, 0, 1):
|
||||
|
@ -2186,6 +2190,9 @@ static int gfx_v9_0_sw_init(void *handle)
|
|||
sprintf(ring->name, "gfx_%d", i);
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
|
||||
|
||||
/* disable scheduler on the real ring */
|
||||
ring->no_scheduler = true;
|
||||
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
|
||||
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
|
||||
AMDGPU_RING_PRIO_DEFAULT, NULL);
|
||||
|
@ -2193,6 +2200,41 @@ static int gfx_v9_0_sw_init(void *handle)
|
|||
return r;
|
||||
}
|
||||
|
||||
/* set up the software rings */
|
||||
if (adev->gfx.num_gfx_rings) {
|
||||
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
|
||||
ring = &adev->gfx.sw_gfx_ring[i];
|
||||
ring->ring_obj = NULL;
|
||||
sprintf(ring->name, amdgpu_sw_ring_name(i));
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
|
||||
ring->is_sw_ring = true;
|
||||
hw_prio = amdgpu_sw_ring_priority(i);
|
||||
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
|
||||
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
|
||||
NULL);
|
||||
if (r)
|
||||
return r;
|
||||
ring->wptr = 0;
|
||||
}
|
||||
|
||||
/* init the muxer and add software rings */
|
||||
r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
|
||||
GFX9_NUM_SW_GFX_RINGS);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
|
||||
return r;
|
||||
}
|
||||
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
|
||||
r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
|
||||
&adev->gfx.sw_gfx_ring[i]);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* set up the compute queues - allocate horizontally across pipes */
|
||||
ring_id = 0;
|
||||
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
|
||||
|
@ -2243,6 +2285,12 @@ static int gfx_v9_0_sw_fini(void *handle)
|
|||
int i;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if (adev->gfx.num_gfx_rings) {
|
||||
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
|
||||
amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
|
||||
amdgpu_ring_mux_fini(&adev->gfx.muxer);
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
|
@ -5157,11 +5205,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
|||
|
||||
control |= ib->length_dw | (vmid << 24);
|
||||
|
||||
if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
|
||||
if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
|
||||
control |= INDIRECT_BUFFER_PRE_ENB(1);
|
||||
|
||||
if (flags & AMDGPU_IB_PREEMPTED)
|
||||
control |= INDIRECT_BUFFER_PRE_RESUME(1);
|
||||
|
||||
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
|
||||
gfx_v9_0_ring_emit_de_meta(ring);
|
||||
gfx_v9_0_ring_emit_de_meta(ring,
|
||||
(!amdgpu_sriov_vf(ring->adev) &&
|
||||
flags & AMDGPU_IB_PREEMPTED) ?
|
||||
true : false);
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, header);
|
||||
|
@ -5216,17 +5270,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
|||
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
|
||||
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
|
||||
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
|
||||
bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
|
||||
uint32_t dw2 = 0;
|
||||
|
||||
/* RELEASE_MEM - flush caches, send int */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
|
||||
amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
|
||||
EOP_TC_NC_ACTION_EN) :
|
||||
(EOP_TCL1_ACTION_EN |
|
||||
EOP_TC_ACTION_EN |
|
||||
EOP_TC_WB_ACTION_EN |
|
||||
EOP_TC_MD_ACTION_EN)) |
|
||||
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
|
||||
EVENT_INDEX(5)));
|
||||
|
||||
if (writeback) {
|
||||
dw2 = EOP_TC_NC_ACTION_EN;
|
||||
} else {
|
||||
dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
|
||||
EOP_TC_MD_ACTION_EN;
|
||||
}
|
||||
dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
|
||||
EVENT_INDEX(5);
|
||||
if (exec)
|
||||
dw2 |= EOP_EXEC;
|
||||
|
||||
amdgpu_ring_write(ring, dw2);
|
||||
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
|
||||
|
||||
/*
|
||||
|
@ -5331,33 +5392,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
|
|||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
|
||||
static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct v9_ce_ib_state ce_payload = {0};
|
||||
uint64_t csa_addr;
|
||||
uint64_t offset, ce_payload_gpu_addr;
|
||||
void *ce_payload_cpu_addr;
|
||||
int cnt;
|
||||
|
||||
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
|
||||
csa_addr = amdgpu_csa_vaddr(ring->adev);
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gfx_meta_data) +
|
||||
offsetof(struct v9_gfx_meta_data, ce_payload);
|
||||
ce_payload_gpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
ce_payload_cpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
|
||||
} else {
|
||||
offset = offsetof(struct v9_gfx_meta_data, ce_payload);
|
||||
ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
|
||||
ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
|
||||
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
|
||||
WRITE_DATA_DST_SEL(8) |
|
||||
WR_CONFIRM) |
|
||||
WRITE_DATA_CACHE_POLICY(0));
|
||||
amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
|
||||
amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
|
||||
amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
|
||||
amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
|
||||
amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
|
||||
|
||||
if (resume)
|
||||
amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
|
||||
sizeof(ce_payload) >> 2);
|
||||
else
|
||||
amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
|
||||
sizeof(ce_payload) >> 2);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
|
||||
static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
|
||||
{
|
||||
int i, r = 0;
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_ring *kiq_ring = &kiq->ring;
|
||||
unsigned long flags;
|
||||
|
||||
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock_irqsave(&kiq->ring_lock, flags);
|
||||
|
||||
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
|
||||
spin_unlock_irqrestore(&kiq->ring_lock, flags);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* assert preemption condition */
|
||||
amdgpu_ring_set_preempt_cond_exec(ring, false);
|
||||
|
||||
ring->trail_seq += 1;
|
||||
amdgpu_ring_alloc(ring, 13);
|
||||
gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
|
||||
ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
|
||||
/*reset the CP_VMID_PREEMPT after trailing fence*/
|
||||
amdgpu_ring_emit_wreg(ring,
|
||||
SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
|
||||
0x0);
|
||||
|
||||
/* assert IB preemption, emit the trailing fence */
|
||||
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
|
||||
ring->trail_fence_gpu_addr,
|
||||
ring->trail_seq);
|
||||
|
||||
amdgpu_ring_commit(kiq_ring);
|
||||
spin_unlock_irqrestore(&kiq->ring_lock, flags);
|
||||
|
||||
/* poll the trailing fence */
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
if (ring->trail_seq ==
|
||||
le32_to_cpu(*ring->trail_fence_cpu_addr))
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
if (i >= adev->usec_timeout) {
|
||||
r = -EINVAL;
|
||||
DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
|
||||
}
|
||||
|
||||
amdgpu_ring_commit(ring);
|
||||
|
||||
/* deassert preemption condition */
|
||||
amdgpu_ring_set_preempt_cond_exec(ring, true);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct v9_de_ib_state de_payload = {0};
|
||||
uint64_t csa_addr, gds_addr;
|
||||
uint64_t offset, gds_addr, de_payload_gpu_addr;
|
||||
void *de_payload_cpu_addr;
|
||||
int cnt;
|
||||
|
||||
csa_addr = amdgpu_csa_vaddr(ring->adev);
|
||||
gds_addr = csa_addr + 4096;
|
||||
if (ring->is_mes_queue) {
|
||||
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gfx_meta_data) +
|
||||
offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
de_payload_gpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
de_payload_cpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
|
||||
|
||||
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gds_backup) +
|
||||
offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
} else {
|
||||
offset = offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
|
||||
de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
|
||||
|
||||
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
|
||||
AMDGPU_CSA_SIZE - adev->gds.gds_size,
|
||||
PAGE_SIZE);
|
||||
}
|
||||
|
||||
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
|
||||
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
|
||||
|
||||
|
@ -5367,9 +5530,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
|
|||
WRITE_DATA_DST_SEL(8) |
|
||||
WR_CONFIRM) |
|
||||
WRITE_DATA_CACHE_POLICY(0));
|
||||
amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
|
||||
amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
|
||||
amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
|
||||
amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
|
||||
amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
|
||||
|
||||
if (resume)
|
||||
amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
|
||||
sizeof(de_payload) >> 2);
|
||||
else
|
||||
amdgpu_ring_write_multiple(ring, (void *)&de_payload,
|
||||
sizeof(de_payload) >> 2);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
|
||||
|
@ -5385,8 +5554,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
|
|||
{
|
||||
uint32_t dw2 = 0;
|
||||
|
||||
if (amdgpu_sriov_vf(ring->adev))
|
||||
gfx_v9_0_ring_emit_ce_meta(ring);
|
||||
gfx_v9_0_ring_emit_ce_meta(ring,
|
||||
(!amdgpu_sriov_vf(ring->adev) &&
|
||||
flags & AMDGPU_IB_PREEMPTED) ? true : false);
|
||||
|
||||
dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
|
||||
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
|
||||
|
@ -5712,7 +5882,12 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
|
|||
|
||||
switch (me_id) {
|
||||
case 0:
|
||||
amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
|
||||
if (adev->gfx.num_gfx_rings &&
|
||||
!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
|
||||
/* Fence signals are handled on the software rings*/
|
||||
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
|
||||
amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
|
@ -6709,6 +6884,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
|
|||
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
|
||||
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
|
||||
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
|
||||
.preempt_ib = gfx_v9_0_ring_preempt_ib,
|
||||
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
|
||||
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
||||
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
|
||||
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
|
||||
.soft_recovery = gfx_v9_0_ring_soft_recovery,
|
||||
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
|
||||
};
|
||||
|
||||
static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
|
||||
.type = AMDGPU_RING_TYPE_GFX,
|
||||
.align_mask = 0xff,
|
||||
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
|
||||
.support_64bit_ptrs = true,
|
||||
.secure_submission_supported = true,
|
||||
.vmhub = AMDGPU_GFXHUB_0,
|
||||
.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
|
||||
.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
|
||||
.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
|
||||
.emit_frame_size = /* totally 242 maximum if 16 IBs */
|
||||
5 + /* COND_EXEC */
|
||||
7 + /* PIPELINE_SYNC */
|
||||
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
|
||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
|
||||
2 + /* VM_FLUSH */
|
||||
8 + /* FENCE for VM_FLUSH */
|
||||
20 + /* GDS switch */
|
||||
4 + /* double SWITCH_BUFFER,
|
||||
* the first COND_EXEC jump to the place just
|
||||
* prior to this double SWITCH_BUFFER
|
||||
*/
|
||||
5 + /* COND_EXEC */
|
||||
7 + /* HDP_flush */
|
||||
4 + /* VGT_flush */
|
||||
14 + /* CE_META */
|
||||
31 + /* DE_META */
|
||||
3 + /* CNTX_CTRL */
|
||||
5 + /* HDP_INVL */
|
||||
8 + 8 + /* FENCE x2 */
|
||||
2 + /* SWITCH_BUFFER */
|
||||
7, /* gfx_v9_0_emit_mem_sync */
|
||||
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
|
||||
.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
|
||||
.emit_fence = gfx_v9_0_ring_emit_fence,
|
||||
.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
|
||||
.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
|
||||
.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
|
||||
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
|
||||
.test_ring = gfx_v9_0_ring_test_ring,
|
||||
.test_ib = gfx_v9_0_ring_test_ib,
|
||||
.insert_nop = amdgpu_sw_ring_insert_nop,
|
||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||
.emit_switch_buffer = gfx_v9_ring_emit_sb,
|
||||
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
|
||||
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
|
||||
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
|
||||
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
|
||||
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
||||
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
|
||||
|
@ -6794,6 +7025,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
|
|||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
|
||||
|
||||
if (adev->gfx.num_gfx_rings) {
|
||||
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
|
||||
adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
|
||||
}
|
||||
|
|
|
@ -759,6 +759,7 @@ static int gmc_v11_0_sw_init(void *handle)
|
|||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
adev->num_vmhubs = 2;
|
||||
/*
|
||||
* To fulfill 4-level page support,
|
||||
|
|
|
@ -388,6 +388,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
|
|||
mes_set_hw_res_pkt.disable_reset = 1;
|
||||
mes_set_hw_res_pkt.disable_mes_log = 1;
|
||||
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
|
||||
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
|
||||
mes_set_hw_res_pkt.oversubscription_timer = 50;
|
||||
|
||||
return mes_v11_0_submit_pkt_and_poll_completion(mes,
|
||||
|
|
|
@ -82,10 +82,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode =
|
|||
/* Navi1x */
|
||||
static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
|
||||
{
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
|
@ -100,10 +100,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode =
|
|||
/* Sienna Cichlid */
|
||||
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] =
|
||||
{
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
|
@ -125,10 +125,10 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
|
|||
|
||||
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] =
|
||||
{
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
|
@ -149,7 +149,7 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode =
|
|||
|
||||
/* Beige Goby*/
|
||||
static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
};
|
||||
|
@ -166,7 +166,7 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = {
|
|||
|
||||
/* Yellow Carp*/
|
||||
static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
|
|
|
@ -46,6 +46,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
|
|||
MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
|
||||
|
||||
/* For large FW files the time to complete can be very long */
|
||||
#define USBC_PD_POLLING_LIMIT_S 240
|
||||
|
@ -102,6 +104,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
|
|||
case IP_VERSION(13, 0, 3):
|
||||
case IP_VERSION(13, 0, 5):
|
||||
case IP_VERSION(13, 0, 8):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
err = psp_init_toc_microcode(psp, chip_name);
|
||||
if (err)
|
||||
return err;
|
||||
|
|
|
@ -907,13 +907,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
|
|||
|
||||
|
||||
/**
|
||||
* sdma_v4_0_gfx_stop - stop the gfx async dma engines
|
||||
* sdma_v4_0_gfx_enable - enable the gfx async dma engines
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Stop the gfx async dma ring buffers (VEGA10).
|
||||
* @enable: enable SDMA RB/IB
|
||||
* control the gfx async dma ring buffers (VEGA10).
|
||||
*/
|
||||
static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
|
||||
static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
|
||||
{
|
||||
u32 rb_cntl, ib_cntl;
|
||||
int i;
|
||||
|
@ -922,10 +922,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
|
|||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
|
||||
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
|
||||
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
|
||||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
|
||||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0);
|
||||
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
|
||||
}
|
||||
}
|
||||
|
@ -1044,7 +1044,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
|
|||
int i;
|
||||
|
||||
if (!enable) {
|
||||
sdma_v4_0_gfx_stop(adev);
|
||||
sdma_v4_0_gfx_enable(adev, enable);
|
||||
sdma_v4_0_rlc_stop(adev);
|
||||
if (adev->sdma.has_page_queue)
|
||||
sdma_v4_0_page_stop(adev);
|
||||
|
@ -1960,8 +1960,10 @@ static int sdma_v4_0_suspend(void *handle)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
/* SMU saves SDMA state for us */
|
||||
if (adev->in_s0ix)
|
||||
if (adev->in_s0ix) {
|
||||
sdma_v4_0_gfx_enable(adev, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return sdma_v4_0_hw_fini(adev);
|
||||
}
|
||||
|
@ -1971,8 +1973,12 @@ static int sdma_v4_0_resume(void *handle)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
/* SMU restores SDMA state for us */
|
||||
if (adev->in_s0ix)
|
||||
if (adev->in_s0ix) {
|
||||
sdma_v4_0_enable(adev, true);
|
||||
sdma_v4_0_gfx_enable(adev, true);
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return sdma_v4_0_hw_init(adev);
|
||||
}
|
||||
|
|
|
@ -103,10 +103,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =
|
|||
/* Vega */
|
||||
static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
|
||||
{
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
};
|
||||
|
@ -120,10 +120,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =
|
|||
/* Raven */
|
||||
static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
|
||||
{
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
|
||||
|
@ -138,10 +138,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =
|
|||
/* Renoir, Arcturus */
|
||||
static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
|
||||
{
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
|
|
|
@ -162,6 +162,7 @@
|
|||
* 2 - Bypass
|
||||
*/
|
||||
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
|
||||
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
|
||||
#define PACKET3_COPY_DATA 0x40
|
||||
#define PACKET3_PFP_SYNC_ME 0x42
|
||||
#define PACKET3_COND_WRITE 0x45
|
||||
|
@ -184,6 +185,7 @@
|
|||
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
|
||||
#define EOP_TC_NC_ACTION_EN (1 << 19)
|
||||
#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
|
||||
#define EOP_EXEC (1 << 28) /* For Trailing Fence */
|
||||
|
||||
#define DATA_SEL(x) ((x) << 29)
|
||||
/* 0 - discard
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include "soc15.h"
|
||||
#include "soc15_common.h"
|
||||
#include "soc21.h"
|
||||
#include "mxgpu_nv.h"
|
||||
|
||||
static const struct amd_ip_funcs soc21_common_ip_funcs;
|
||||
|
||||
|
@ -61,7 +62,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode =
|
|||
|
||||
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array[] =
|
||||
{
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
|
@ -325,6 +326,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
|
|||
case IP_VERSION(13, 0, 10):
|
||||
return AMD_RESET_METHOD_MODE1;
|
||||
case IP_VERSION(13, 0, 4):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
return AMD_RESET_METHOD_MODE2;
|
||||
default:
|
||||
if (amdgpu_dpm_is_baco_supported(adev))
|
||||
|
@ -644,24 +646,45 @@ static int soc21_common_early_init(void *handle)
|
|||
AMD_PG_SUPPORT_JPEG;
|
||||
adev->external_rev_id = adev->rev_id + 0x20;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 4):
|
||||
adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
|
||||
AMD_CG_SUPPORT_JPEG_MGCG;
|
||||
adev->pg_flags = AMD_PG_SUPPORT_VCN |
|
||||
AMD_PG_SUPPORT_GFX_PG |
|
||||
AMD_PG_SUPPORT_JPEG;
|
||||
adev->external_rev_id = adev->rev_id + 0x1;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* FIXME: not supported yet */
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
amdgpu_virt_init_setting(adev);
|
||||
xgpu_nv_mailbox_set_irq_funcs(adev);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int soc21_common_late_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
xgpu_nv_mailbox_get_irq(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int soc21_common_sw_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
xgpu_nv_mailbox_add_irq_id(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -699,6 +722,9 @@ static int soc21_common_hw_fini(void *handle)
|
|||
/* disable the doorbell aperture */
|
||||
soc21_enable_doorbell_aperture(adev, false);
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
xgpu_nv_mailbox_put_irq(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -862,6 +862,28 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
|
|||
return;
|
||||
}
|
||||
|
||||
static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
|
||||
bool indirect)
|
||||
{
|
||||
uint32_t tmp;
|
||||
|
||||
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
|
||||
return;
|
||||
|
||||
tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
|
||||
VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
|
||||
VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
|
||||
VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
|
||||
WREG32_SOC15_DPG_MODE(inst_idx,
|
||||
SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
|
||||
tmp, 0, indirect);
|
||||
|
||||
tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
|
||||
WREG32_SOC15_DPG_MODE(inst_idx,
|
||||
SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
|
||||
tmp, 0, indirect);
|
||||
}
|
||||
|
||||
/**
|
||||
* vcn_v4_0_start_dpg_mode - VCN start with dpg mode
|
||||
*
|
||||
|
@ -950,6 +972,8 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
|
|||
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
|
||||
VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
|
||||
|
||||
vcn_v4_0_enable_ras(adev, inst_idx, indirect);
|
||||
|
||||
/* enable master interrupt */
|
||||
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
|
||||
VCN, inst_idx, regUVD_MASTINT_EN),
|
||||
|
|
|
@ -1521,6 +1521,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca
|
|||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
num_of_cache_types =
|
||||
kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
|
||||
break;
|
||||
|
|
|
@ -153,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
|
|||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
|
||||
break;
|
||||
default:
|
||||
|
@ -394,6 +395,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
|
|||
f2g = &gfx_v11_kfd2kgd;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
gfx_target_version = 110003;
|
||||
f2g = &gfx_v11_kfd2kgd;
|
||||
break;
|
||||
|
|
|
@ -2012,10 +2012,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
|||
|
||||
kfd_debug_print_topology();
|
||||
|
||||
if (!res)
|
||||
kfd_notify_gpu_change(gpu_id, 1);
|
||||
kfd_notify_gpu_change(gpu_id, 1);
|
||||
|
||||
return res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -146,14 +146,6 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
|
|||
/* Number of bytes in PSP footer for firmware. */
|
||||
#define PSP_FOOTER_BYTES 0x100
|
||||
|
||||
/*
|
||||
* DMUB Async to Sync Mechanism Status
|
||||
*/
|
||||
#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1
|
||||
#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2
|
||||
#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3
|
||||
#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4
|
||||
|
||||
/**
|
||||
* DOC: overview
|
||||
*
|
||||
|
@ -1104,7 +1096,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
|
|||
/* Initialize hardware. */
|
||||
memset(&hw_params, 0, sizeof(hw_params));
|
||||
hw_params.fb_base = adev->gmc.fb_start;
|
||||
hw_params.fb_offset = adev->gmc.aper_base;
|
||||
hw_params.fb_offset = adev->vm_manager.vram_base_offset;
|
||||
|
||||
/* backdoor load firmware and trigger dmub running */
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
|
@ -1226,7 +1218,7 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
|
|||
pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24;
|
||||
|
||||
pa_config->system_aperture.fb_base = adev->gmc.fb_start;
|
||||
pa_config->system_aperture.fb_offset = adev->gmc.aper_base;
|
||||
pa_config->system_aperture.fb_offset = adev->vm_manager.vram_base_offset;
|
||||
pa_config->system_aperture.fb_top = adev->gmc.fb_end;
|
||||
|
||||
pa_config->gart_config.page_table_start_addr = page_table_start.quad_part << 12;
|
||||
|
@ -1441,6 +1433,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
|
|||
memset(&init_params, 0, sizeof(init_params));
|
||||
#endif
|
||||
|
||||
mutex_init(&adev->dm.dpia_aux_lock);
|
||||
mutex_init(&adev->dm.dc_lock);
|
||||
mutex_init(&adev->dm.audio_lock);
|
||||
|
||||
|
@ -1805,6 +1798,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
|
|||
|
||||
mutex_destroy(&adev->dm.audio_lock);
|
||||
mutex_destroy(&adev->dm.dc_lock);
|
||||
mutex_destroy(&adev->dm.dpia_aux_lock);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -4875,6 +4869,35 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline void fill_dc_dirty_rect(struct drm_plane *plane,
|
||||
struct rect *dirty_rect, int32_t x,
|
||||
int32_t y, int32_t width, int32_t height,
|
||||
int *i, bool ffu)
|
||||
{
|
||||
if (*i > DC_MAX_DIRTY_RECTS)
|
||||
return;
|
||||
|
||||
if (*i == DC_MAX_DIRTY_RECTS)
|
||||
goto out;
|
||||
|
||||
dirty_rect->x = x;
|
||||
dirty_rect->y = y;
|
||||
dirty_rect->width = width;
|
||||
dirty_rect->height = height;
|
||||
|
||||
if (ffu)
|
||||
drm_dbg(plane->dev,
|
||||
"[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
|
||||
plane->base.id, width, height);
|
||||
else
|
||||
drm_dbg(plane->dev,
|
||||
"[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)",
|
||||
plane->base.id, x, y, width, height);
|
||||
|
||||
out:
|
||||
(*i)++;
|
||||
}
|
||||
|
||||
/**
|
||||
* fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates
|
||||
*
|
||||
|
@ -4895,10 +4918,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
|
|||
* addition, certain use cases - such as cursor and multi-plane overlay (MPO) -
|
||||
* implicitly provide damage clips without any client support via the plane
|
||||
* bounds.
|
||||
*
|
||||
* Today, amdgpu_dm only supports the MPO and cursor usecase.
|
||||
*
|
||||
* TODO: Also enable for FB_DAMAGE_CLIPS
|
||||
*/
|
||||
static void fill_dc_dirty_rects(struct drm_plane *plane,
|
||||
struct drm_plane_state *old_plane_state,
|
||||
|
@ -4909,12 +4928,11 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
|
|||
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
|
||||
struct rect *dirty_rects = flip_addrs->dirty_rects;
|
||||
uint32_t num_clips;
|
||||
struct drm_mode_rect *clips;
|
||||
bool bb_changed;
|
||||
bool fb_changed;
|
||||
uint32_t i = 0;
|
||||
|
||||
flip_addrs->dirty_rect_count = 0;
|
||||
|
||||
/*
|
||||
* Cursor plane has it's own dirty rect update interface. See
|
||||
* dcn10_dmub_update_cursor_data and dmub_cmd_update_cursor_info_data
|
||||
|
@ -4922,20 +4940,20 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
|
|||
if (plane->type == DRM_PLANE_TYPE_CURSOR)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Today, we only consider MPO use-case for PSR SU. If MPO not
|
||||
* requested, and there is a plane update, do FFU.
|
||||
*/
|
||||
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
|
||||
clips = drm_plane_get_damage_clips(new_plane_state);
|
||||
|
||||
if (!dm_crtc_state->mpo_requested) {
|
||||
dirty_rects[0].x = 0;
|
||||
dirty_rects[0].y = 0;
|
||||
dirty_rects[0].width = dm_crtc_state->base.mode.crtc_hdisplay;
|
||||
dirty_rects[0].height = dm_crtc_state->base.mode.crtc_vdisplay;
|
||||
flip_addrs->dirty_rect_count = 1;
|
||||
DRM_DEBUG_DRIVER("[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
|
||||
new_plane_state->plane->base.id,
|
||||
dm_crtc_state->base.mode.crtc_hdisplay,
|
||||
dm_crtc_state->base.mode.crtc_vdisplay);
|
||||
if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
|
||||
goto ffu;
|
||||
|
||||
for (; flip_addrs->dirty_rect_count < num_clips; clips++)
|
||||
fill_dc_dirty_rect(new_plane_state->plane,
|
||||
&dirty_rects[i], clips->x1,
|
||||
clips->y1, clips->x2 - clips->x1,
|
||||
clips->y2 - clips->y1,
|
||||
&flip_addrs->dirty_rect_count,
|
||||
false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -4946,7 +4964,6 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
|
|||
* If plane is moved or resized, also add old bounding box to dirty
|
||||
* rects.
|
||||
*/
|
||||
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
|
||||
fb_changed = old_plane_state->fb->base.id !=
|
||||
new_plane_state->fb->base.id;
|
||||
bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x ||
|
||||
|
@ -4954,36 +4971,51 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
|
|||
old_plane_state->crtc_w != new_plane_state->crtc_w ||
|
||||
old_plane_state->crtc_h != new_plane_state->crtc_h);
|
||||
|
||||
DRM_DEBUG_DRIVER("[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
|
||||
new_plane_state->plane->base.id,
|
||||
bb_changed, fb_changed, num_clips);
|
||||
drm_dbg(plane->dev,
|
||||
"[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
|
||||
new_plane_state->plane->base.id,
|
||||
bb_changed, fb_changed, num_clips);
|
||||
|
||||
if (num_clips || fb_changed || bb_changed) {
|
||||
dirty_rects[i].x = new_plane_state->crtc_x;
|
||||
dirty_rects[i].y = new_plane_state->crtc_y;
|
||||
dirty_rects[i].width = new_plane_state->crtc_w;
|
||||
dirty_rects[i].height = new_plane_state->crtc_h;
|
||||
DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
|
||||
new_plane_state->plane->base.id,
|
||||
dirty_rects[i].x, dirty_rects[i].y,
|
||||
dirty_rects[i].width, dirty_rects[i].height);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
/* Add old plane bounding-box if plane is moved or resized */
|
||||
if (bb_changed) {
|
||||
dirty_rects[i].x = old_plane_state->crtc_x;
|
||||
dirty_rects[i].y = old_plane_state->crtc_y;
|
||||
dirty_rects[i].width = old_plane_state->crtc_w;
|
||||
dirty_rects[i].height = old_plane_state->crtc_h;
|
||||
DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
|
||||
old_plane_state->plane->base.id,
|
||||
dirty_rects[i].x, dirty_rects[i].y,
|
||||
dirty_rects[i].width, dirty_rects[i].height);
|
||||
i += 1;
|
||||
fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
|
||||
new_plane_state->crtc_x,
|
||||
new_plane_state->crtc_y,
|
||||
new_plane_state->crtc_w,
|
||||
new_plane_state->crtc_h, &i, false);
|
||||
|
||||
/* Add old plane bounding-box if plane is moved or resized */
|
||||
fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
|
||||
old_plane_state->crtc_x,
|
||||
old_plane_state->crtc_y,
|
||||
old_plane_state->crtc_w,
|
||||
old_plane_state->crtc_h, &i, false);
|
||||
}
|
||||
|
||||
if (num_clips) {
|
||||
for (; i < num_clips; clips++)
|
||||
fill_dc_dirty_rect(new_plane_state->plane,
|
||||
&dirty_rects[i], clips->x1,
|
||||
clips->y1, clips->x2 - clips->x1,
|
||||
clips->y2 - clips->y1, &i, false);
|
||||
} else if (fb_changed && !bb_changed) {
|
||||
fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
|
||||
new_plane_state->crtc_x,
|
||||
new_plane_state->crtc_y,
|
||||
new_plane_state->crtc_w,
|
||||
new_plane_state->crtc_h, &i, false);
|
||||
}
|
||||
|
||||
if (i > DC_MAX_DIRTY_RECTS)
|
||||
goto ffu;
|
||||
|
||||
flip_addrs->dirty_rect_count = i;
|
||||
return;
|
||||
|
||||
ffu:
|
||||
fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[0], 0, 0,
|
||||
dm_crtc_state->base.mode.crtc_hdisplay,
|
||||
dm_crtc_state->base.mode.crtc_vdisplay,
|
||||
&flip_addrs->dirty_rect_count, true);
|
||||
}
|
||||
|
||||
static void update_stream_scaling_settings(const struct drm_display_mode *mode,
|
||||
|
@ -10204,91 +10236,92 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
|
|||
return value;
|
||||
}
|
||||
|
||||
static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux,
|
||||
struct dc_context *ctx,
|
||||
uint8_t status_type,
|
||||
uint32_t *operation_result)
|
||||
int amdgpu_dm_process_dmub_aux_transfer_sync(
|
||||
struct dc_context *ctx,
|
||||
unsigned int link_index,
|
||||
struct aux_payload *payload,
|
||||
enum aux_return_code_type *operation_result)
|
||||
{
|
||||
struct amdgpu_device *adev = ctx->driver_context;
|
||||
int return_status = -1;
|
||||
struct dmub_notification *p_notify = adev->dm.dmub_notify;
|
||||
int ret = -1;
|
||||
|
||||
if (is_cmd_aux) {
|
||||
if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
|
||||
return_status = p_notify->aux_reply.length;
|
||||
*operation_result = p_notify->result;
|
||||
} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) {
|
||||
*operation_result = AUX_RET_ERROR_TIMEOUT;
|
||||
} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) {
|
||||
*operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
|
||||
} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) {
|
||||
*operation_result = AUX_RET_ERROR_INVALID_REPLY;
|
||||
} else {
|
||||
*operation_result = AUX_RET_ERROR_UNKNOWN;
|
||||
}
|
||||
} else {
|
||||
if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
|
||||
return_status = 0;
|
||||
*operation_result = p_notify->sc_status;
|
||||
} else {
|
||||
*operation_result = SET_CONFIG_UNKNOWN_ERROR;
|
||||
}
|
||||
mutex_lock(&adev->dm.dpia_aux_lock);
|
||||
if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) {
|
||||
*operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
|
||||
DRM_ERROR("wait_for_completion_timeout timeout!");
|
||||
*operation_result = AUX_RET_ERROR_TIMEOUT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
return return_status;
|
||||
if (p_notify->result != AUX_RET_SUCCESS) {
|
||||
/*
|
||||
* Transient states before tunneling is enabled could
|
||||
* lead to this error. We can ignore this for now.
|
||||
*/
|
||||
if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) {
|
||||
DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n",
|
||||
payload->address, payload->length,
|
||||
p_notify->result);
|
||||
}
|
||||
*operation_result = AUX_RET_ERROR_INVALID_REPLY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
|
||||
if (!payload->write && p_notify->aux_reply.length &&
|
||||
(payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) {
|
||||
|
||||
if (payload->length != p_notify->aux_reply.length) {
|
||||
DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n",
|
||||
p_notify->aux_reply.length,
|
||||
payload->address, payload->length);
|
||||
*operation_result = AUX_RET_ERROR_INVALID_REPLY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memcpy(payload->data, p_notify->aux_reply.data,
|
||||
p_notify->aux_reply.length);
|
||||
}
|
||||
|
||||
/* success */
|
||||
ret = p_notify->aux_reply.length;
|
||||
*operation_result = p_notify->result;
|
||||
out:
|
||||
mutex_unlock(&adev->dm.dpia_aux_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx,
|
||||
unsigned int link_index, void *cmd_payload, void *operation_result)
|
||||
int amdgpu_dm_process_dmub_set_config_sync(
|
||||
struct dc_context *ctx,
|
||||
unsigned int link_index,
|
||||
struct set_config_cmd_payload *payload,
|
||||
enum set_config_status *operation_result)
|
||||
{
|
||||
struct amdgpu_device *adev = ctx->driver_context;
|
||||
int ret = 0;
|
||||
bool is_cmd_complete;
|
||||
int ret;
|
||||
|
||||
if (is_cmd_aux) {
|
||||
dc_process_dmub_aux_transfer_async(ctx->dc,
|
||||
link_index, (struct aux_payload *)cmd_payload);
|
||||
} else if (dc_process_dmub_set_config_async(ctx->dc, link_index,
|
||||
(struct set_config_cmd_payload *)cmd_payload,
|
||||
adev->dm.dmub_notify)) {
|
||||
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
|
||||
ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
|
||||
(uint32_t *)operation_result);
|
||||
}
|
||||
mutex_lock(&adev->dm.dpia_aux_lock);
|
||||
is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc,
|
||||
link_index, payload, adev->dm.dmub_notify);
|
||||
|
||||
ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ);
|
||||
if (ret == 0) {
|
||||
if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
|
||||
ret = 0;
|
||||
*operation_result = adev->dm.dmub_notify->sc_status;
|
||||
} else {
|
||||
DRM_ERROR("wait_for_completion_timeout timeout!");
|
||||
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
|
||||
ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT,
|
||||
(uint32_t *)operation_result);
|
||||
ret = -1;
|
||||
*operation_result = SET_CONFIG_UNKNOWN_ERROR;
|
||||
}
|
||||
|
||||
if (is_cmd_aux) {
|
||||
if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
|
||||
struct aux_payload *payload = (struct aux_payload *)cmd_payload;
|
||||
|
||||
payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
|
||||
if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
|
||||
payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) {
|
||||
|
||||
if (payload->length != adev->dm.dmub_notify->aux_reply.length) {
|
||||
DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n",
|
||||
payload->address, payload->length,
|
||||
adev->dm.dmub_notify->aux_reply.length);
|
||||
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx,
|
||||
DMUB_ASYNC_TO_SYNC_ACCESS_INVALID,
|
||||
(uint32_t *)operation_result);
|
||||
}
|
||||
|
||||
memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
|
||||
adev->dm.dmub_notify->aux_reply.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
|
||||
ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
|
||||
(uint32_t *)operation_result);
|
||||
mutex_unlock(&adev->dm.dpia_aux_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -59,7 +59,9 @@
|
|||
#include "signal_types.h"
|
||||
#include "amdgpu_dm_crc.h"
|
||||
struct aux_payload;
|
||||
struct set_config_cmd_payload;
|
||||
enum aux_return_code_type;
|
||||
enum set_config_status;
|
||||
|
||||
/* Forward declarations */
|
||||
struct amdgpu_device;
|
||||
|
@ -542,6 +544,13 @@ struct amdgpu_display_manager {
|
|||
* occurred on certain intel platform
|
||||
*/
|
||||
bool aux_hpd_discon_quirk;
|
||||
|
||||
/**
|
||||
* @dpia_aux_lock:
|
||||
*
|
||||
* Guards access to DPIA AUX
|
||||
*/
|
||||
struct mutex dpia_aux_lock;
|
||||
};
|
||||
|
||||
enum dsc_clock_force_state {
|
||||
|
@ -785,9 +794,11 @@ void amdgpu_dm_update_connector_after_detect(
|
|||
|
||||
extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
|
||||
|
||||
int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux,
|
||||
struct dc_context *ctx, unsigned int link_index,
|
||||
void *payload, void *operation_result);
|
||||
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
|
||||
struct aux_payload *payload, enum aux_return_code_type *operation_result);
|
||||
|
||||
int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
|
||||
struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
|
||||
|
||||
bool check_seamless_boot_capability(struct amdgpu_device *adev);
|
||||
|
||||
|
|
|
@ -2639,6 +2639,25 @@ static int dp_mst_progress_status_show(struct seq_file *m, void *unused)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reports whether the connected display is a USB4 DPIA tunneled display
|
||||
* Example usage: cat /sys/kernel/debug/dri/0/DP-8/is_dpia_link
|
||||
*/
|
||||
static int is_dpia_link_show(struct seq_file *m, void *data)
|
||||
{
|
||||
struct drm_connector *connector = m->private;
|
||||
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
|
||||
struct dc_link *link = aconnector->dc_link;
|
||||
|
||||
if (connector->status != connector_status_connected)
|
||||
return -ENODEV;
|
||||
|
||||
seq_printf(m, "%s\n", (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? "yes" :
|
||||
(link->ep_type == DISPLAY_ENDPOINT_PHY) ? "no" : "unknown");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
|
||||
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
|
||||
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
|
||||
|
@ -2650,6 +2669,7 @@ DEFINE_SHOW_ATTRIBUTE(internal_display);
|
|||
DEFINE_SHOW_ATTRIBUTE(psr_capability);
|
||||
DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
|
||||
DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status);
|
||||
DEFINE_SHOW_ATTRIBUTE(is_dpia_link);
|
||||
|
||||
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
|
@ -2794,7 +2814,8 @@ static const struct {
|
|||
{"max_bpc", &dp_max_bpc_debugfs_fops},
|
||||
{"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops},
|
||||
{"is_mst_connector", &dp_is_mst_connector_fops},
|
||||
{"mst_progress_status", &dp_mst_progress_status_fops}
|
||||
{"mst_progress_status", &dp_mst_progress_status_fops},
|
||||
{"is_dpia_link", &is_dpia_link_fops}
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DRM_AMD_DC_HDCP
|
||||
|
|
|
@ -817,9 +817,8 @@ int dm_helper_dmub_aux_transfer_sync(
|
|||
struct aux_payload *payload,
|
||||
enum aux_return_code_type *operation_result)
|
||||
{
|
||||
return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx,
|
||||
link->link_index, (void *)payload,
|
||||
(void *)operation_result);
|
||||
return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
|
||||
operation_result);
|
||||
}
|
||||
|
||||
int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
|
||||
|
@ -827,9 +826,8 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
|
|||
struct set_config_cmd_payload *payload,
|
||||
enum set_config_status *operation_result)
|
||||
{
|
||||
return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx,
|
||||
link->link_index, (void *)payload,
|
||||
(void *)operation_result);
|
||||
return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload,
|
||||
operation_result);
|
||||
}
|
||||
|
||||
void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
|
||||
|
|
|
@ -1600,6 +1600,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
|
|||
drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
|
||||
supported_rotations);
|
||||
|
||||
if (dm->adev->ip_versions[DCE_HWIP][0] > IP_VERSION(3, 0, 1) &&
|
||||
plane->type != DRM_PLANE_TYPE_CURSOR)
|
||||
drm_plane_enable_fb_damage_clips(plane);
|
||||
|
||||
drm_plane_helper_add(plane, &dm_plane_helper_funcs);
|
||||
|
||||
#ifdef CONFIG_DRM_AMD_DC_HDR
|
||||
|
|
|
@ -438,7 +438,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
|
|||
}
|
||||
|
||||
if (!new_clocks->dtbclk_en) {
|
||||
new_clocks->ref_dtbclk_khz = 0;
|
||||
new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
|
||||
}
|
||||
|
||||
/* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
|
||||
|
|
|
@ -3061,7 +3061,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
|
|||
* Ensures that we have enough pipes for newly added MPO planes
|
||||
*/
|
||||
if (dc->res_pool->funcs->remove_phantom_pipes)
|
||||
dc->res_pool->funcs->remove_phantom_pipes(dc, context);
|
||||
dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
|
||||
|
||||
/*remove old surfaces from context */
|
||||
if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
|
||||
|
@ -3954,6 +3954,7 @@ bool dc_update_planes_and_stream(struct dc *dc,
|
|||
struct dc_state *context;
|
||||
enum surface_update_type update_type;
|
||||
int i;
|
||||
struct mall_temp_config mall_temp_config;
|
||||
|
||||
/* In cases where MPO and split or ODM are used transitions can
|
||||
* cause underflow. Apply stream configuration with minimal pipe
|
||||
|
@ -3985,11 +3986,29 @@ bool dc_update_planes_and_stream(struct dc *dc,
|
|||
|
||||
/* on plane removal, minimal state is the new one */
|
||||
if (force_minimal_pipe_splitting && !is_plane_addition) {
|
||||
/* Since all phantom pipes are removed in full validation,
|
||||
* we have to save and restore the subvp/mall config when
|
||||
* we do a minimal transition since the flags marking the
|
||||
* pipe as subvp/phantom will be cleared (dc copy constructor
|
||||
* creates a shallow copy).
|
||||
*/
|
||||
if (dc->res_pool->funcs->save_mall_state)
|
||||
dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
|
||||
if (!commit_minimal_transition_state(dc, context)) {
|
||||
dc_release_state(context);
|
||||
return false;
|
||||
}
|
||||
if (dc->res_pool->funcs->restore_mall_state)
|
||||
dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
|
||||
|
||||
/* If we do a minimal transition with plane removal and the context
|
||||
* has subvp we also have to retain back the phantom stream / planes
|
||||
* since the refcount is decremented as part of the min transition
|
||||
* (we commit a state with no subvp, so the phantom streams / planes
|
||||
* had to be removed).
|
||||
*/
|
||||
if (dc->res_pool->funcs->retain_phantom_pipes)
|
||||
dc->res_pool->funcs->retain_phantom_pipes(dc, context);
|
||||
update_type = UPDATE_TYPE_FULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ struct aux_payload;
|
|||
struct set_config_cmd_payload;
|
||||
struct dmub_notification;
|
||||
|
||||
#define DC_VER "3.2.213"
|
||||
#define DC_VER "3.2.214"
|
||||
|
||||
#define MAX_SURFACES 3
|
||||
#define MAX_PLANES 6
|
||||
|
|
|
@ -160,6 +160,17 @@ struct mall_stream_config {
|
|||
struct dc_stream_state *paired_stream; // master / slave stream
|
||||
};
|
||||
|
||||
/* Temp struct used to save and restore MALL config
|
||||
* during validation.
|
||||
*
|
||||
* TODO: Move MALL config into dc_state instead of stream struct
|
||||
* to avoid needing to save/restore.
|
||||
*/
|
||||
struct mall_temp_config {
|
||||
struct mall_stream_config mall_stream_config[MAX_PIPES];
|
||||
bool is_phantom_plane[MAX_PIPES];
|
||||
};
|
||||
|
||||
struct dc_stream_state {
|
||||
// sink is deprecated, new code should not reference
|
||||
// this pointer
|
||||
|
|
|
@ -869,6 +869,32 @@ static void false_optc_underflow_wa(
|
|||
tg->funcs->clear_optc_underflow(tg);
|
||||
}
|
||||
|
||||
static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
|
||||
{
|
||||
struct pipe_ctx *other_pipe;
|
||||
int vready_offset = pipe->pipe_dlg_param.vready_offset;
|
||||
|
||||
/* Always use the largest vready_offset of all connected pipes */
|
||||
for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
|
||||
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
|
||||
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
|
||||
}
|
||||
for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
|
||||
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
|
||||
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
|
||||
}
|
||||
for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
|
||||
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
|
||||
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
|
||||
}
|
||||
for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
|
||||
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
|
||||
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
|
||||
}
|
||||
|
||||
return vready_offset;
|
||||
}
|
||||
|
||||
enum dc_status dcn10_enable_stream_timing(
|
||||
struct pipe_ctx *pipe_ctx,
|
||||
struct dc_state *context,
|
||||
|
@ -912,7 +938,7 @@ enum dc_status dcn10_enable_stream_timing(
|
|||
pipe_ctx->stream_res.tg->funcs->program_timing(
|
||||
pipe_ctx->stream_res.tg,
|
||||
&stream->timing,
|
||||
pipe_ctx->pipe_dlg_param.vready_offset,
|
||||
calculate_vready_offset_for_group(pipe_ctx),
|
||||
pipe_ctx->pipe_dlg_param.vstartup_start,
|
||||
pipe_ctx->pipe_dlg_param.vupdate_offset,
|
||||
pipe_ctx->pipe_dlg_param.vupdate_width,
|
||||
|
@ -2908,7 +2934,7 @@ void dcn10_program_pipe(
|
|||
|
||||
pipe_ctx->stream_res.tg->funcs->program_global_sync(
|
||||
pipe_ctx->stream_res.tg,
|
||||
pipe_ctx->pipe_dlg_param.vready_offset,
|
||||
calculate_vready_offset_for_group(pipe_ctx),
|
||||
pipe_ctx->pipe_dlg_param.vstartup_start,
|
||||
pipe_ctx->pipe_dlg_param.vupdate_offset,
|
||||
pipe_ctx->pipe_dlg_param.vupdate_width);
|
||||
|
|
|
@ -1652,6 +1652,31 @@ static void dcn20_update_dchubp_dpp(
|
|||
hubp->funcs->phantom_hubp_post_enable(hubp);
|
||||
}
|
||||
|
||||
static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
|
||||
{
|
||||
struct pipe_ctx *other_pipe;
|
||||
int vready_offset = pipe->pipe_dlg_param.vready_offset;
|
||||
|
||||
/* Always use the largest vready_offset of all connected pipes */
|
||||
for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
|
||||
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
|
||||
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
|
||||
}
|
||||
for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
|
||||
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
|
||||
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
|
||||
}
|
||||
for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
|
||||
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
|
||||
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
|
||||
}
|
||||
for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
|
||||
if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
|
||||
vready_offset = other_pipe->pipe_dlg_param.vready_offset;
|
||||
}
|
||||
|
||||
return vready_offset;
|
||||
}
|
||||
|
||||
static void dcn20_program_pipe(
|
||||
struct dc *dc,
|
||||
|
@ -1670,7 +1695,7 @@ static void dcn20_program_pipe(
|
|||
&& !pipe_ctx->prev_odm_pipe) {
|
||||
pipe_ctx->stream_res.tg->funcs->program_global_sync(
|
||||
pipe_ctx->stream_res.tg,
|
||||
pipe_ctx->pipe_dlg_param.vready_offset,
|
||||
calculate_vready_offset_for_group(pipe_ctx),
|
||||
pipe_ctx->pipe_dlg_param.vstartup_start,
|
||||
pipe_ctx->pipe_dlg_param.vupdate_offset,
|
||||
pipe_ctx->pipe_dlg_param.vupdate_width);
|
||||
|
@ -1716,7 +1741,10 @@ static void dcn20_program_pipe(
|
|||
* only do gamma programming for powering on, internal memcmp to avoid
|
||||
* updating on slave planes
|
||||
*/
|
||||
if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf)
|
||||
if (pipe_ctx->update_flags.bits.enable ||
|
||||
pipe_ctx->update_flags.bits.plane_changed ||
|
||||
pipe_ctx->stream->update_flags.bits.out_tf ||
|
||||
pipe_ctx->plane_state->update_flags.bits.output_tf_change)
|
||||
hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
|
||||
|
||||
/* If the pipe has been enabled or has a different opp, we
|
||||
|
@ -2067,7 +2095,7 @@ bool dcn20_update_bandwidth(
|
|||
|
||||
pipe_ctx->stream_res.tg->funcs->program_global_sync(
|
||||
pipe_ctx->stream_res.tg,
|
||||
pipe_ctx->pipe_dlg_param.vready_offset,
|
||||
calculate_vready_offset_for_group(pipe_ctx),
|
||||
pipe_ctx->pipe_dlg_param.vstartup_start,
|
||||
pipe_ctx->pipe_dlg_param.vupdate_offset,
|
||||
pipe_ctx->pipe_dlg_param.vupdate_width);
|
||||
|
|
|
@ -225,11 +225,7 @@ static void dccg32_set_dtbclk_dto(
|
|||
} else {
|
||||
REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
|
||||
DTBCLK_DTO_ENABLE[params->otg_inst], 0,
|
||||
PIPE_DTO_SRC_SEL[params->otg_inst], 1);
|
||||
if (params->is_hdmi)
|
||||
REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
|
||||
PIPE_DTO_SRC_SEL[params->otg_inst], 0);
|
||||
|
||||
PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
|
||||
REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
|
||||
REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
|
||||
}
|
||||
|
|
|
@ -262,11 +262,11 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
|
|||
num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
|
||||
((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
|
||||
|
||||
/* For DCC:
|
||||
* meta_num_mblk = CEILING(full_mblk_width_ub_l*full_mblk_height_ub_l*Bpe/256/mblk_bytes, 1)
|
||||
/*For DCC:
|
||||
* meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
|
||||
*/
|
||||
if (pipe->plane_state->dcc.enable)
|
||||
num_mblks += (mall_alloc_width_blk_aligned * mall_alloc_width_blk_aligned * bytes_per_pixel +
|
||||
num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
|
||||
(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
|
||||
|
||||
bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
|
||||
|
@ -316,8 +316,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
|
|||
cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) /
|
||||
DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) /
|
||||
dc->caps.cache_line_size + 2;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1743,7 +1743,7 @@ void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context)
|
|||
}
|
||||
|
||||
// return true if removed piped from ctx, false otherwise
|
||||
bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
|
||||
bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update)
|
||||
{
|
||||
int i;
|
||||
bool removed_pipe = false;
|
||||
|
@ -1770,14 +1770,23 @@ bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
|
|||
removed_pipe = true;
|
||||
}
|
||||
|
||||
// Clear all phantom stream info
|
||||
if (pipe->stream) {
|
||||
pipe->stream->mall_stream_config.type = SUBVP_NONE;
|
||||
pipe->stream->mall_stream_config.paired_stream = NULL;
|
||||
}
|
||||
/* For non-full updates, a shallow copy of the current state
|
||||
* is created. In this case we don't want to erase the current
|
||||
* state (there can be 2 HIRQL threads, one in flip, and one in
|
||||
* checkMPO) that can cause a race condition.
|
||||
*
|
||||
* This is just a workaround, needs a proper fix.
|
||||
*/
|
||||
if (!fast_update) {
|
||||
// Clear all phantom stream info
|
||||
if (pipe->stream) {
|
||||
pipe->stream->mall_stream_config.type = SUBVP_NONE;
|
||||
pipe->stream->mall_stream_config.paired_stream = NULL;
|
||||
}
|
||||
|
||||
if (pipe->plane_state) {
|
||||
pipe->plane_state->is_phantom = false;
|
||||
if (pipe->plane_state) {
|
||||
pipe->plane_state->is_phantom = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return removed_pipe;
|
||||
|
@ -1950,23 +1959,28 @@ int dcn32_populate_dml_pipes_from_context(
|
|||
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
|
||||
pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19;
|
||||
|
||||
switch (pipe->stream->mall_stream_config.type) {
|
||||
case SUBVP_MAIN:
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
|
||||
subvp_in_use = true;
|
||||
break;
|
||||
case SUBVP_PHANTOM:
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe;
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
|
||||
// Disallow unbounded req for SubVP according to DCHUB programming guide
|
||||
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
|
||||
break;
|
||||
case SUBVP_NONE:
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable;
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
/* Only populate DML input with subvp info for full updates.
|
||||
* This is just a workaround -- needs a proper fix.
|
||||
*/
|
||||
if (!fast_validate) {
|
||||
switch (pipe->stream->mall_stream_config.type) {
|
||||
case SUBVP_MAIN:
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
|
||||
subvp_in_use = true;
|
||||
break;
|
||||
case SUBVP_PHANTOM:
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe;
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
|
||||
// Disallow unbounded req for SubVP according to DCHUB programming guide
|
||||
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
|
||||
break;
|
||||
case SUBVP_NONE:
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable;
|
||||
pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pipes[pipe_cnt].dout.dsc_input_bpc = 0;
|
||||
|
@ -2055,6 +2069,8 @@ static struct resource_funcs dcn32_res_pool_funcs = {
|
|||
.add_phantom_pipes = dcn32_add_phantom_pipes,
|
||||
.remove_phantom_pipes = dcn32_remove_phantom_pipes,
|
||||
.retain_phantom_pipes = dcn32_retain_phantom_pipes,
|
||||
.save_mall_state = dcn32_save_mall_state,
|
||||
.restore_mall_state = dcn32_restore_mall_state,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -45,17 +45,6 @@
|
|||
extern struct _vcs_dpi_ip_params_st dcn3_2_ip;
|
||||
extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc;
|
||||
|
||||
/* Temp struct used to save and restore MALL config
|
||||
* during validation.
|
||||
*
|
||||
* TODO: Move MALL config into dc_state instead of stream struct
|
||||
* to avoid needing to save/restore.
|
||||
*/
|
||||
struct mall_temp_config {
|
||||
struct mall_stream_config mall_stream_config[MAX_PIPES];
|
||||
bool is_phantom_plane[MAX_PIPES];
|
||||
};
|
||||
|
||||
struct dcn32_resource_pool {
|
||||
struct resource_pool base;
|
||||
};
|
||||
|
@ -81,7 +70,7 @@ bool dcn32_release_post_bldn_3dlut(
|
|||
struct dc_transfer_func **shaper);
|
||||
|
||||
bool dcn32_remove_phantom_pipes(struct dc *dc,
|
||||
struct dc_state *context);
|
||||
struct dc_state *context, bool fast_update);
|
||||
|
||||
void dcn32_retain_phantom_pipes(struct dc *dc,
|
||||
struct dc_state *context);
|
||||
|
|
|
@ -97,14 +97,14 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
|
|||
* FLOOR(vp_x_start, blk_width)
|
||||
*/
|
||||
full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
|
||||
pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) +
|
||||
pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) -
|
||||
(pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
|
||||
|
||||
/* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
|
||||
* FLOOR(vp_y_start, blk_height)
|
||||
*/
|
||||
full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
|
||||
full_vp_height + mblk_height - 1) / mblk_height * mblk_height) +
|
||||
full_vp_height + mblk_height - 1) / mblk_height * mblk_height) -
|
||||
(pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
|
||||
|
||||
/* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */
|
||||
|
@ -121,14 +121,19 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
|
|||
*/
|
||||
num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
|
||||
((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
|
||||
|
||||
/*For DCC:
|
||||
* meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
|
||||
*/
|
||||
if (pipe->plane_state->dcc.enable)
|
||||
num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
|
||||
(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
|
||||
|
||||
bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
|
||||
// cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
|
||||
// (MALL is 64-byte aligned)
|
||||
cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2;
|
||||
|
||||
/* For DCC divide by 256 */
|
||||
if (pipe->plane_state->dcc.enable)
|
||||
cache_lines_per_plane = cache_lines_per_plane + (cache_lines_per_plane / 256) + 1;
|
||||
cache_lines_used += cache_lines_per_plane;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1622,6 +1622,8 @@ static struct resource_funcs dcn321_res_pool_funcs = {
|
|||
.add_phantom_pipes = dcn32_add_phantom_pipes,
|
||||
.remove_phantom_pipes = dcn32_remove_phantom_pipes,
|
||||
.retain_phantom_pipes = dcn32_retain_phantom_pipes,
|
||||
.save_mall_state = dcn32_save_mall_state,
|
||||
.restore_mall_state = dcn32_restore_mall_state,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -559,6 +559,9 @@ void dcn31_calculate_wm_and_dlg_fp(
|
|||
context->bw_ctx.bw.dcn.clk.dramclk_khz = 0;
|
||||
context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
|
||||
context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
|
||||
for (i = 0; i < dc->res_pool->pipe_count; i++)
|
||||
if (context->res_ctx.pipe_ctx[i].stream)
|
||||
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1203,7 +1203,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
|
|||
// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
|
||||
// remove phantom pipes and repopulate dml pipes
|
||||
if (!found_supported_config) {
|
||||
dc->res_pool->funcs->remove_phantom_pipes(dc, context);
|
||||
dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
|
||||
vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
|
||||
*pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
|
||||
|
||||
|
@ -1320,7 +1320,10 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
|
|||
|
||||
if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
|
||||
context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
|
||||
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
|
||||
if (context->res_ctx.pipe_ctx[i].plane_state)
|
||||
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
|
||||
else
|
||||
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
|
||||
context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
|
||||
pipe_idx++;
|
||||
}
|
||||
|
@ -1515,7 +1518,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
|
|||
return false;
|
||||
|
||||
// For each full update, remove all existing phantom pipes first
|
||||
dc->res_pool->funcs->remove_phantom_pipes(dc, context);
|
||||
dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate);
|
||||
|
||||
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
|
||||
|
||||
|
|
|
@ -240,9 +240,11 @@ struct resource_funcs {
|
|||
unsigned int pipe_cnt,
|
||||
unsigned int index);
|
||||
|
||||
bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context);
|
||||
bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update);
|
||||
void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context);
|
||||
void (*get_panel_config_defaults)(struct dc_panel_config *panel_config);
|
||||
void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
|
||||
void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
|
||||
};
|
||||
|
||||
struct audio_support{
|
||||
|
|
|
@ -222,7 +222,11 @@ union MESAPI_SET_HW_RESOURCES {
|
|||
uint32_t apply_grbm_remote_register_dummy_read_wa : 1;
|
||||
uint32_t second_gfx_pipe_enabled : 1;
|
||||
uint32_t enable_level_process_quantum_check : 1;
|
||||
uint32_t reserved : 25;
|
||||
uint32_t legacy_sch_mode : 1;
|
||||
uint32_t disable_add_queue_wptr_mc_addr : 1;
|
||||
uint32_t enable_mes_event_int_logging : 1;
|
||||
uint32_t enable_reg_active_poll : 1;
|
||||
uint32_t reserved : 21;
|
||||
};
|
||||
uint32_t uint32_t_all;
|
||||
};
|
||||
|
|
|
@ -161,7 +161,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu,
|
|||
|
||||
int smu_set_gfx_power_up_by_imu(struct smu_context *smu)
|
||||
{
|
||||
if (!smu->ppt_funcs && !smu->ppt_funcs->set_gfx_power_up_by_imu)
|
||||
if (!smu->ppt_funcs || !smu->ppt_funcs->set_gfx_power_up_by_imu)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return smu->ppt_funcs->set_gfx_power_up_by_imu(smu);
|
||||
|
@ -585,6 +585,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
|
|||
yellow_carp_set_ppt_funcs(smu);
|
||||
break;
|
||||
case IP_VERSION(13, 0, 4):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
smu_v13_0_4_set_ppt_funcs(smu);
|
||||
break;
|
||||
case IP_VERSION(13, 0, 5):
|
||||
|
|
|
@ -79,6 +79,17 @@ MODULE_FIRMWARE("amdgpu/beige_goby_smc.bin");
|
|||
#define mmTHM_BACO_CNTL_ARCT 0xA7
|
||||
#define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0
|
||||
|
||||
static void smu_v11_0_poll_baco_exit(struct smu_context *smu)
|
||||
{
|
||||
struct amdgpu_device *adev = smu->adev;
|
||||
uint32_t data, loop = 0;
|
||||
|
||||
do {
|
||||
usleep_range(1000, 1100);
|
||||
data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
|
||||
} while ((data & 0x100) && (++loop < 100));
|
||||
}
|
||||
|
||||
int smu_v11_0_init_microcode(struct smu_context *smu)
|
||||
{
|
||||
struct amdgpu_device *adev = smu->adev;
|
||||
|
@ -1588,6 +1599,10 @@ bool smu_v11_0_baco_is_support(struct smu_context *smu)
|
|||
if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support)
|
||||
return false;
|
||||
|
||||
/* return true if ASIC is in BACO state already */
|
||||
if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
|
||||
return true;
|
||||
|
||||
/* Arcturus does not support this bit mask */
|
||||
if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
|
||||
!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
|
||||
|
@ -1685,7 +1700,18 @@ int smu_v11_0_baco_enter(struct smu_context *smu)
|
|||
|
||||
int smu_v11_0_baco_exit(struct smu_context *smu)
|
||||
{
|
||||
return smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
|
||||
int ret;
|
||||
|
||||
ret = smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
|
||||
if (!ret) {
|
||||
/*
|
||||
* Poll BACO exit status to ensure FW has completed
|
||||
* BACO exit process to avoid timing issues.
|
||||
*/
|
||||
smu_v11_0_poll_baco_exit(smu);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int smu_v11_0_mode1_reset(struct smu_context *smu)
|
||||
|
|
|
@ -250,6 +250,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu)
|
|||
|
||||
switch (adev->ip_versions[MP1_HWIP][0]) {
|
||||
case IP_VERSION(13, 0, 4):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
mp1_fw_flags = RREG32_PCIE(MP1_Public |
|
||||
(smnMP1_V13_0_4_FIRMWARE_FLAGS & 0xffffffff));
|
||||
break;
|
||||
|
@ -301,6 +302,7 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
|
|||
smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP;
|
||||
break;
|
||||
case IP_VERSION(13, 0, 4):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_4;
|
||||
break;
|
||||
case IP_VERSION(13, 0, 5):
|
||||
|
@ -841,6 +843,7 @@ int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable)
|
|||
case IP_VERSION(13, 0, 7):
|
||||
case IP_VERSION(13, 0, 8):
|
||||
case IP_VERSION(13, 0, 10):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
|
||||
return 0;
|
||||
if (enable)
|
||||
|
|
|
@ -1026,6 +1026,15 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = {
|
|||
.set_gfx_power_up_by_imu = smu_v13_0_set_gfx_power_up_by_imu,
|
||||
};
|
||||
|
||||
static void smu_v13_0_4_set_smu_mailbox_registers(struct smu_context *smu)
|
||||
{
|
||||
struct amdgpu_device *adev = smu->adev;
|
||||
|
||||
smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82);
|
||||
smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66);
|
||||
smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
|
||||
}
|
||||
|
||||
void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu)
|
||||
{
|
||||
struct amdgpu_device *adev = smu->adev;
|
||||
|
@ -1035,7 +1044,9 @@ void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu)
|
|||
smu->feature_map = smu_v13_0_4_feature_mask_map;
|
||||
smu->table_map = smu_v13_0_4_table_map;
|
||||
smu->is_apu = true;
|
||||
smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82);
|
||||
smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66);
|
||||
smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
|
||||
|
||||
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4))
|
||||
smu_v13_0_4_set_smu_mailbox_registers(smu);
|
||||
else
|
||||
smu_v13_0_set_smu_mailbox_registers(smu);
|
||||
}
|
||||
|
|
|
@ -227,6 +227,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
|
|||
|
||||
if (!found)
|
||||
return false;
|
||||
pci_dev_put(pdev);
|
||||
|
||||
rdev->bios = kmalloc(size, GFP_KERNEL);
|
||||
if (!rdev->bios) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче