iommu/arm-smmu-v3: Add second level of context descriptor table

The SMMU can support up to 20 bits of SSID. Add a second level of page
tables to accommodate this. Devices that support more than 1024 SSIDs now
have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
descriptors (64kB), allocated on demand.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Will Deacon <will@kernel.org>
This commit is contained in:
Jean-Philippe Brucker 2020-01-15 13:52:36 +01:00 коммит произвёл Will Deacon
Родитель 492ddc79e0
Коммит 73af06f589
1 изменённых файлов: 134 добавлений и 8 удалений

Просмотреть файл

@ -223,6 +223,7 @@
#define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
#define STRTAB_STE_0_S1FMT_LINEAR 0
#define STRTAB_STE_0_S1FMT_64K_L2 2
#define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
#define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
@ -269,7 +270,20 @@
#define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
/* Context descriptor (stage-1 only) */
/*
* Context descriptors.
*
* Linear: when less than 1024 SSIDs are supported
* 2lvl: at most 1024 L1 entries,
* 1024 lazy entries per table.
*/
#define CTXDESC_SPLIT 10
#define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT)
#define CTXDESC_L1_DESC_DWORDS 1
#define CTXDESC_L1_DESC_V (1UL << 0)
#define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12)
#define CTXDESC_CD_DWORDS 8
#define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
#define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
@ -558,9 +572,15 @@ struct arm_smmu_ctx_desc {
u64 mair;
};
struct arm_smmu_l1_ctx_desc {
__le64 *l2ptr;
dma_addr_t l2ptr_dma;
};
struct arm_smmu_ctx_desc_cfg {
__le64 *cdtab;
dma_addr_t cdtab_dma;
struct arm_smmu_l1_ctx_desc *l1_desc;
unsigned int num_l1_ents;
};
@ -1490,6 +1510,57 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
arm_smmu_cmdq_issue_sync(smmu);
}
static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
struct arm_smmu_l1_ctx_desc *l1_desc)
{
size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
&l1_desc->l2ptr_dma, GFP_KERNEL);
if (!l1_desc->l2ptr) {
dev_warn(smmu->dev,
"failed to allocate context descriptor table\n");
return -ENOMEM;
}
return 0;
}
static void arm_smmu_write_cd_l1_desc(__le64 *dst,
struct arm_smmu_l1_ctx_desc *l1_desc)
{
u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
CTXDESC_L1_DESC_V;
WRITE_ONCE(*dst, cpu_to_le64(val));
}
static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
u32 ssid)
{
__le64 *l1ptr;
unsigned int idx;
struct arm_smmu_l1_ctx_desc *l1_desc;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
idx = ssid >> CTXDESC_SPLIT;
l1_desc = &cdcfg->l1_desc[idx];
if (!l1_desc->l2ptr) {
if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
return NULL;
l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
/* An invalid L1CD can be cached */
arm_smmu_sync_cd(smmu_domain, ssid, false);
}
idx = ssid & (CTXDESC_L2_ENTRIES - 1);
return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
}
static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
int ssid, struct arm_smmu_ctx_desc *cd)
{
@ -1504,9 +1575,15 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
*/
u64 val;
bool cd_live;
__le64 *cdptr;
struct arm_smmu_device *smmu = smmu_domain->smmu;
__le64 *cdptr = smmu_domain->s1_cfg.cdcfg.cdtab + ssid *
CTXDESC_CD_DWORDS;
if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
return -E2BIG;
cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
if (!cdptr)
return -ENOMEM;
val = le64_to_cpu(cdptr[0]);
cd_live = !!(val & CTXDESC_CD_0_V);
@ -1562,29 +1639,78 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
{
int ret;
size_t l1size;
size_t max_contexts;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
max_contexts = 1 << cfg->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
cdcfg->num_l1_ents = max_contexts;
l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
} else {
cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
CTXDESC_L2_ENTRIES);
cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
sizeof(*cdcfg->l1_desc),
GFP_KERNEL);
if (!cdcfg->l1_desc)
return -ENOMEM;
l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
}
cdcfg->num_l1_ents = 1UL << cfg->s1cdmax;
l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
GFP_KERNEL);
if (!cdcfg->cdtab) {
dev_warn(smmu->dev, "failed to allocate context descriptor\n");
return -ENOMEM;
ret = -ENOMEM;
goto err_free_l1;
}
return 0;
err_free_l1:
if (cdcfg->l1_desc) {
devm_kfree(smmu->dev, cdcfg->l1_desc);
cdcfg->l1_desc = NULL;
}
return ret;
}
static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
{
int i;
size_t size, l1size;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
size_t l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
if (cdcfg->l1_desc) {
size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
for (i = 0; i < cdcfg->num_l1_ents; i++) {
if (!cdcfg->l1_desc[i].l2ptr)
continue;
dmam_free_coherent(smmu->dev, size,
cdcfg->l1_desc[i].l2ptr,
cdcfg->l1_desc[i].l2ptr_dma);
}
devm_kfree(smmu->dev, cdcfg->l1_desc);
cdcfg->l1_desc = NULL;
l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
} else {
l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
}
dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
cdcfg->cdtab_dma = 0;