iommu/arm-smmu-v3: Move definitions to a header
authorJean-Philippe Brucker <jean-philippe@linaro.org>
Fri, 18 Sep 2020 10:18:47 +0000 (12:18 +0200)
committerWill Deacon <will@kernel.org>
Mon, 28 Sep 2020 22:48:06 +0000 (23:48 +0100)
Allow sharing structure definitions with the upcoming SVA support for
Arm SMMUv3, by moving them to a separate header. We could surgically
extract only what is needed but keeping all definitions in one place
looks nicer.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20200918101852.582559-8-jean-philippe@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h [new file with mode: 0644]

index 1ffec08d345ad3319aa799f8b5f9f16bedeca08c..b0d26c9f577daa5805cf63f8da007b80dd1e8ce5 100644 (file)
@@ -11,7 +11,6 @@
 
 #include <linux/acpi.h>
 #include <linux/acpi_iort.h>
-#include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/crash_dump.h>
 #include <linux/delay.h>
@@ -19,7 +18,6 @@
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io-pgtable.h>
-#include <linux/iommu.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/msi.h>
 
 #include <linux/amba/bus.h>
 
-/* MMIO registers */
-#define ARM_SMMU_IDR0                  0x0
-#define IDR0_ST_LVL                    GENMASK(28, 27)
-#define IDR0_ST_LVL_2LVL               1
-#define IDR0_STALL_MODEL               GENMASK(25, 24)
-#define IDR0_STALL_MODEL_STALL         0
-#define IDR0_STALL_MODEL_FORCE         2
-#define IDR0_TTENDIAN                  GENMASK(22, 21)
-#define IDR0_TTENDIAN_MIXED            0
-#define IDR0_TTENDIAN_LE               2
-#define IDR0_TTENDIAN_BE               3
-#define IDR0_CD2L                      (1 << 19)
-#define IDR0_VMID16                    (1 << 18)
-#define IDR0_PRI                       (1 << 16)
-#define IDR0_SEV                       (1 << 14)
-#define IDR0_MSI                       (1 << 13)
-#define IDR0_ASID16                    (1 << 12)
-#define IDR0_ATS                       (1 << 10)
-#define IDR0_HYP                       (1 << 9)
-#define IDR0_COHACC                    (1 << 4)
-#define IDR0_TTF                       GENMASK(3, 2)
-#define IDR0_TTF_AARCH64               2
-#define IDR0_TTF_AARCH32_64            3
-#define IDR0_S1P                       (1 << 1)
-#define IDR0_S2P                       (1 << 0)
-
-#define ARM_SMMU_IDR1                  0x4
-#define IDR1_TABLES_PRESET             (1 << 30)
-#define IDR1_QUEUES_PRESET             (1 << 29)
-#define IDR1_REL                       (1 << 28)
-#define IDR1_CMDQS                     GENMASK(25, 21)
-#define IDR1_EVTQS                     GENMASK(20, 16)
-#define IDR1_PRIQS                     GENMASK(15, 11)
-#define IDR1_SSIDSIZE                  GENMASK(10, 6)
-#define IDR1_SIDSIZE                   GENMASK(5, 0)
-
-#define ARM_SMMU_IDR3                  0xc
-#define IDR3_RIL                       (1 << 10)
-
-#define ARM_SMMU_IDR5                  0x14
-#define IDR5_STALL_MAX                 GENMASK(31, 16)
-#define IDR5_GRAN64K                   (1 << 6)
-#define IDR5_GRAN16K                   (1 << 5)
-#define IDR5_GRAN4K                    (1 << 4)
-#define IDR5_OAS                       GENMASK(2, 0)
-#define IDR5_OAS_32_BIT                        0
-#define IDR5_OAS_36_BIT                        1
-#define IDR5_OAS_40_BIT                        2
-#define IDR5_OAS_42_BIT                        3
-#define IDR5_OAS_44_BIT                        4
-#define IDR5_OAS_48_BIT                        5
-#define IDR5_OAS_52_BIT                        6
-#define IDR5_VAX                       GENMASK(11, 10)
-#define IDR5_VAX_52_BIT                        1
-
-#define ARM_SMMU_CR0                   0x20
-#define CR0_ATSCHK                     (1 << 4)
-#define CR0_CMDQEN                     (1 << 3)
-#define CR0_EVTQEN                     (1 << 2)
-#define CR0_PRIQEN                     (1 << 1)
-#define CR0_SMMUEN                     (1 << 0)
-
-#define ARM_SMMU_CR0ACK                        0x24
-
-#define ARM_SMMU_CR1                   0x28
-#define CR1_TABLE_SH                   GENMASK(11, 10)
-#define CR1_TABLE_OC                   GENMASK(9, 8)
-#define CR1_TABLE_IC                   GENMASK(7, 6)
-#define CR1_QUEUE_SH                   GENMASK(5, 4)
-#define CR1_QUEUE_OC                   GENMASK(3, 2)
-#define CR1_QUEUE_IC                   GENMASK(1, 0)
-/* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
-#define CR1_CACHE_NC                   0
-#define CR1_CACHE_WB                   1
-#define CR1_CACHE_WT                   2
-
-#define ARM_SMMU_CR2                   0x2c
-#define CR2_PTM                                (1 << 2)
-#define CR2_RECINVSID                  (1 << 1)
-#define CR2_E2H                                (1 << 0)
-
-#define ARM_SMMU_GBPA                  0x44
-#define GBPA_UPDATE                    (1 << 31)
-#define GBPA_ABORT                     (1 << 20)
-
-#define ARM_SMMU_IRQ_CTRL              0x50
-#define IRQ_CTRL_EVTQ_IRQEN            (1 << 2)
-#define IRQ_CTRL_PRIQ_IRQEN            (1 << 1)
-#define IRQ_CTRL_GERROR_IRQEN          (1 << 0)
-
-#define ARM_SMMU_IRQ_CTRLACK           0x54
-
-#define ARM_SMMU_GERROR                        0x60
-#define GERROR_SFM_ERR                 (1 << 8)
-#define GERROR_MSI_GERROR_ABT_ERR      (1 << 7)
-#define GERROR_MSI_PRIQ_ABT_ERR                (1 << 6)
-#define GERROR_MSI_EVTQ_ABT_ERR                (1 << 5)
-#define GERROR_MSI_CMDQ_ABT_ERR                (1 << 4)
-#define GERROR_PRIQ_ABT_ERR            (1 << 3)
-#define GERROR_EVTQ_ABT_ERR            (1 << 2)
-#define GERROR_CMDQ_ERR                        (1 << 0)
-#define GERROR_ERR_MASK                        0xfd
-
-#define ARM_SMMU_GERRORN               0x64
-
-#define ARM_SMMU_GERROR_IRQ_CFG0       0x68
-#define ARM_SMMU_GERROR_IRQ_CFG1       0x70
-#define ARM_SMMU_GERROR_IRQ_CFG2       0x74
-
-#define ARM_SMMU_STRTAB_BASE           0x80
-#define STRTAB_BASE_RA                 (1UL << 62)
-#define STRTAB_BASE_ADDR_MASK          GENMASK_ULL(51, 6)
-
-#define ARM_SMMU_STRTAB_BASE_CFG       0x88
-#define STRTAB_BASE_CFG_FMT            GENMASK(17, 16)
-#define STRTAB_BASE_CFG_FMT_LINEAR     0
-#define STRTAB_BASE_CFG_FMT_2LVL       1
-#define STRTAB_BASE_CFG_SPLIT          GENMASK(10, 6)
-#define STRTAB_BASE_CFG_LOG2SIZE       GENMASK(5, 0)
-
-#define ARM_SMMU_CMDQ_BASE             0x90
-#define ARM_SMMU_CMDQ_PROD             0x98
-#define ARM_SMMU_CMDQ_CONS             0x9c
-
-#define ARM_SMMU_EVTQ_BASE             0xa0
-#define ARM_SMMU_EVTQ_PROD             0x100a8
-#define ARM_SMMU_EVTQ_CONS             0x100ac
-#define ARM_SMMU_EVTQ_IRQ_CFG0         0xb0
-#define ARM_SMMU_EVTQ_IRQ_CFG1         0xb8
-#define ARM_SMMU_EVTQ_IRQ_CFG2         0xbc
-
-#define ARM_SMMU_PRIQ_BASE             0xc0
-#define ARM_SMMU_PRIQ_PROD             0x100c8
-#define ARM_SMMU_PRIQ_CONS             0x100cc
-#define ARM_SMMU_PRIQ_IRQ_CFG0         0xd0
-#define ARM_SMMU_PRIQ_IRQ_CFG1         0xd8
-#define ARM_SMMU_PRIQ_IRQ_CFG2         0xdc
-
-#define ARM_SMMU_REG_SZ                        0xe00
-
-/* Common MSI config fields */
-#define MSI_CFG0_ADDR_MASK             GENMASK_ULL(51, 2)
-#define MSI_CFG2_SH                    GENMASK(5, 4)
-#define MSI_CFG2_MEMATTR               GENMASK(3, 0)
-
-/* Common memory attribute values */
-#define ARM_SMMU_SH_NSH                        0
-#define ARM_SMMU_SH_OSH                        2
-#define ARM_SMMU_SH_ISH                        3
-#define ARM_SMMU_MEMATTR_DEVICE_nGnRE  0x1
-#define ARM_SMMU_MEMATTR_OIWB          0xf
-
-#define Q_IDX(llq, p)                  ((p) & ((1 << (llq)->max_n_shift) - 1))
-#define Q_WRP(llq, p)                  ((p) & (1 << (llq)->max_n_shift))
-#define Q_OVERFLOW_FLAG                        (1U << 31)
-#define Q_OVF(p)                       ((p) & Q_OVERFLOW_FLAG)
-#define Q_ENT(q, p)                    ((q)->base +                    \
-                                        Q_IDX(&((q)->llq), p) *        \
-                                        (q)->ent_dwords)
-
-#define Q_BASE_RWA                     (1UL << 62)
-#define Q_BASE_ADDR_MASK               GENMASK_ULL(51, 5)
-#define Q_BASE_LOG2SIZE                        GENMASK(4, 0)
-
-/* Ensure DMA allocations are naturally aligned */
-#ifdef CONFIG_CMA_ALIGNMENT
-#define Q_MAX_SZ_SHIFT                 (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
-#else
-#define Q_MAX_SZ_SHIFT                 (PAGE_SHIFT + MAX_ORDER - 1)
-#endif
-
-/*
- * Stream table.
- *
- * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
- * 2lvl: 128k L1 entries,
- *       256 lazy entries per table (each table covers a PCI bus)
- */
-#define STRTAB_L1_SZ_SHIFT             20
-#define STRTAB_SPLIT                   8
-
-#define STRTAB_L1_DESC_DWORDS          1
-#define STRTAB_L1_DESC_SPAN            GENMASK_ULL(4, 0)
-#define STRTAB_L1_DESC_L2PTR_MASK      GENMASK_ULL(51, 6)
-
-#define STRTAB_STE_DWORDS              8
-#define STRTAB_STE_0_V                 (1UL << 0)
-#define STRTAB_STE_0_CFG               GENMASK_ULL(3, 1)
-#define STRTAB_STE_0_CFG_ABORT         0
-#define STRTAB_STE_0_CFG_BYPASS                4
-#define STRTAB_STE_0_CFG_S1_TRANS      5
-#define STRTAB_STE_0_CFG_S2_TRANS      6
-
-#define STRTAB_STE_0_S1FMT             GENMASK_ULL(5, 4)
-#define STRTAB_STE_0_S1FMT_LINEAR      0
-#define STRTAB_STE_0_S1FMT_64K_L2      2
-#define STRTAB_STE_0_S1CTXPTR_MASK     GENMASK_ULL(51, 6)
-#define STRTAB_STE_0_S1CDMAX           GENMASK_ULL(63, 59)
-
-#define STRTAB_STE_1_S1DSS             GENMASK_ULL(1, 0)
-#define STRTAB_STE_1_S1DSS_TERMINATE   0x0
-#define STRTAB_STE_1_S1DSS_BYPASS      0x1
-#define STRTAB_STE_1_S1DSS_SSID0       0x2
-
-#define STRTAB_STE_1_S1C_CACHE_NC      0UL
-#define STRTAB_STE_1_S1C_CACHE_WBRA    1UL
-#define STRTAB_STE_1_S1C_CACHE_WT      2UL
-#define STRTAB_STE_1_S1C_CACHE_WB      3UL
-#define STRTAB_STE_1_S1CIR             GENMASK_ULL(3, 2)
-#define STRTAB_STE_1_S1COR             GENMASK_ULL(5, 4)
-#define STRTAB_STE_1_S1CSH             GENMASK_ULL(7, 6)
-
-#define STRTAB_STE_1_S1STALLD          (1UL << 27)
-
-#define STRTAB_STE_1_EATS              GENMASK_ULL(29, 28)
-#define STRTAB_STE_1_EATS_ABT          0UL
-#define STRTAB_STE_1_EATS_TRANS                1UL
-#define STRTAB_STE_1_EATS_S1CHK                2UL
-
-#define STRTAB_STE_1_STRW              GENMASK_ULL(31, 30)
-#define STRTAB_STE_1_STRW_NSEL1                0UL
-#define STRTAB_STE_1_STRW_EL2          2UL
-
-#define STRTAB_STE_1_SHCFG             GENMASK_ULL(45, 44)
-#define STRTAB_STE_1_SHCFG_INCOMING    1UL
-
-#define STRTAB_STE_2_S2VMID            GENMASK_ULL(15, 0)
-#define STRTAB_STE_2_VTCR              GENMASK_ULL(50, 32)
-#define STRTAB_STE_2_VTCR_S2T0SZ       GENMASK_ULL(5, 0)
-#define STRTAB_STE_2_VTCR_S2SL0                GENMASK_ULL(7, 6)
-#define STRTAB_STE_2_VTCR_S2IR0                GENMASK_ULL(9, 8)
-#define STRTAB_STE_2_VTCR_S2OR0                GENMASK_ULL(11, 10)
-#define STRTAB_STE_2_VTCR_S2SH0                GENMASK_ULL(13, 12)
-#define STRTAB_STE_2_VTCR_S2TG         GENMASK_ULL(15, 14)
-#define STRTAB_STE_2_VTCR_S2PS         GENMASK_ULL(18, 16)
-#define STRTAB_STE_2_S2AA64            (1UL << 51)
-#define STRTAB_STE_2_S2ENDI            (1UL << 52)
-#define STRTAB_STE_2_S2PTW             (1UL << 54)
-#define STRTAB_STE_2_S2R               (1UL << 58)
-
-#define STRTAB_STE_3_S2TTB_MASK                GENMASK_ULL(51, 4)
-
-/*
- * Context descriptors.
- *
- * Linear: when less than 1024 SSIDs are supported
- * 2lvl: at most 1024 L1 entries,
- *       1024 lazy entries per table.
- */
-#define CTXDESC_SPLIT                  10
-#define CTXDESC_L2_ENTRIES             (1 << CTXDESC_SPLIT)
-
-#define CTXDESC_L1_DESC_DWORDS         1
-#define CTXDESC_L1_DESC_V              (1UL << 0)
-#define CTXDESC_L1_DESC_L2PTR_MASK     GENMASK_ULL(51, 12)
-
-#define CTXDESC_CD_DWORDS              8
-#define CTXDESC_CD_0_TCR_T0SZ          GENMASK_ULL(5, 0)
-#define CTXDESC_CD_0_TCR_TG0           GENMASK_ULL(7, 6)
-#define CTXDESC_CD_0_TCR_IRGN0         GENMASK_ULL(9, 8)
-#define CTXDESC_CD_0_TCR_ORGN0         GENMASK_ULL(11, 10)
-#define CTXDESC_CD_0_TCR_SH0           GENMASK_ULL(13, 12)
-#define CTXDESC_CD_0_TCR_EPD0          (1ULL << 14)
-#define CTXDESC_CD_0_TCR_EPD1          (1ULL << 30)
-
-#define CTXDESC_CD_0_ENDI              (1UL << 15)
-#define CTXDESC_CD_0_V                 (1UL << 31)
-
-#define CTXDESC_CD_0_TCR_IPS           GENMASK_ULL(34, 32)
-#define CTXDESC_CD_0_TCR_TBI0          (1ULL << 38)
-
-#define CTXDESC_CD_0_AA64              (1UL << 41)
-#define CTXDESC_CD_0_S                 (1UL << 44)
-#define CTXDESC_CD_0_R                 (1UL << 45)
-#define CTXDESC_CD_0_A                 (1UL << 46)
-#define CTXDESC_CD_0_ASET              (1UL << 47)
-#define CTXDESC_CD_0_ASID              GENMASK_ULL(63, 48)
-
-#define CTXDESC_CD_1_TTB0_MASK         GENMASK_ULL(51, 4)
-
-/*
- * When the SMMU only supports linear context descriptor tables, pick a
- * reasonable size limit (64kB).
- */
-#define CTXDESC_LINEAR_CDMAX           ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
-
-/* Command queue */
-#define CMDQ_ENT_SZ_SHIFT              4
-#define CMDQ_ENT_DWORDS                        ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
-#define CMDQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
-
-#define CMDQ_CONS_ERR                  GENMASK(30, 24)
-#define CMDQ_ERR_CERROR_NONE_IDX       0
-#define CMDQ_ERR_CERROR_ILL_IDX                1
-#define CMDQ_ERR_CERROR_ABT_IDX                2
-#define CMDQ_ERR_CERROR_ATC_INV_IDX    3
-
-#define CMDQ_PROD_OWNED_FLAG           Q_OVERFLOW_FLAG
-
-/*
- * This is used to size the command queue and therefore must be at least
- * BITS_PER_LONG so that the valid_map works correctly (it relies on the
- * total number of queue entries being a multiple of BITS_PER_LONG).
- */
-#define CMDQ_BATCH_ENTRIES             BITS_PER_LONG
-
-#define CMDQ_0_OP                      GENMASK_ULL(7, 0)
-#define CMDQ_0_SSV                     (1UL << 11)
-
-#define CMDQ_PREFETCH_0_SID            GENMASK_ULL(63, 32)
-#define CMDQ_PREFETCH_1_SIZE           GENMASK_ULL(4, 0)
-#define CMDQ_PREFETCH_1_ADDR_MASK      GENMASK_ULL(63, 12)
-
-#define CMDQ_CFGI_0_SSID               GENMASK_ULL(31, 12)
-#define CMDQ_CFGI_0_SID                        GENMASK_ULL(63, 32)
-#define CMDQ_CFGI_1_LEAF               (1UL << 0)
-#define CMDQ_CFGI_1_RANGE              GENMASK_ULL(4, 0)
-
-#define CMDQ_TLBI_0_NUM                        GENMASK_ULL(16, 12)
-#define CMDQ_TLBI_RANGE_NUM_MAX                31
-#define CMDQ_TLBI_0_SCALE              GENMASK_ULL(24, 20)
-#define CMDQ_TLBI_0_VMID               GENMASK_ULL(47, 32)
-#define CMDQ_TLBI_0_ASID               GENMASK_ULL(63, 48)
-#define CMDQ_TLBI_1_LEAF               (1UL << 0)
-#define CMDQ_TLBI_1_TTL                        GENMASK_ULL(9, 8)
-#define CMDQ_TLBI_1_TG                 GENMASK_ULL(11, 10)
-#define CMDQ_TLBI_1_VA_MASK            GENMASK_ULL(63, 12)
-#define CMDQ_TLBI_1_IPA_MASK           GENMASK_ULL(51, 12)
-
-#define CMDQ_ATC_0_SSID                        GENMASK_ULL(31, 12)
-#define CMDQ_ATC_0_SID                 GENMASK_ULL(63, 32)
-#define CMDQ_ATC_0_GLOBAL              (1UL << 9)
-#define CMDQ_ATC_1_SIZE                        GENMASK_ULL(5, 0)
-#define CMDQ_ATC_1_ADDR_MASK           GENMASK_ULL(63, 12)
-
-#define CMDQ_PRI_0_SSID                        GENMASK_ULL(31, 12)
-#define CMDQ_PRI_0_SID                 GENMASK_ULL(63, 32)
-#define CMDQ_PRI_1_GRPID               GENMASK_ULL(8, 0)
-#define CMDQ_PRI_1_RESP                        GENMASK_ULL(13, 12)
-
-#define CMDQ_SYNC_0_CS                 GENMASK_ULL(13, 12)
-#define CMDQ_SYNC_0_CS_NONE            0
-#define CMDQ_SYNC_0_CS_IRQ             1
-#define CMDQ_SYNC_0_CS_SEV             2
-#define CMDQ_SYNC_0_MSH                        GENMASK_ULL(23, 22)
-#define CMDQ_SYNC_0_MSIATTR            GENMASK_ULL(27, 24)
-#define CMDQ_SYNC_0_MSIDATA            GENMASK_ULL(63, 32)
-#define CMDQ_SYNC_1_MSIADDR_MASK       GENMASK_ULL(51, 2)
-
-/* Event queue */
-#define EVTQ_ENT_SZ_SHIFT              5
-#define EVTQ_ENT_DWORDS                        ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
-#define EVTQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
-
-#define EVTQ_0_ID                      GENMASK_ULL(7, 0)
-
-/* PRI queue */
-#define PRIQ_ENT_SZ_SHIFT              4
-#define PRIQ_ENT_DWORDS                        ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
-#define PRIQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
-
-#define PRIQ_0_SID                     GENMASK_ULL(31, 0)
-#define PRIQ_0_SSID                    GENMASK_ULL(51, 32)
-#define PRIQ_0_PERM_PRIV               (1UL << 58)
-#define PRIQ_0_PERM_EXEC               (1UL << 59)
-#define PRIQ_0_PERM_READ               (1UL << 60)
-#define PRIQ_0_PERM_WRITE              (1UL << 61)
-#define PRIQ_0_PRG_LAST                        (1UL << 62)
-#define PRIQ_0_SSID_V                  (1UL << 63)
-
-#define PRIQ_1_PRG_IDX                 GENMASK_ULL(8, 0)
-#define PRIQ_1_ADDR_MASK               GENMASK_ULL(63, 12)
-
-/* High-level queue structures */
-#define ARM_SMMU_POLL_TIMEOUT_US       1000000 /* 1s! */
-#define ARM_SMMU_POLL_SPIN_COUNT       10
-
-#define MSI_IOVA_BASE                  0x8000000
-#define MSI_IOVA_LENGTH                        0x100000
+#include "arm-smmu-v3.h"
 
 static bool disable_bypass = 1;
 module_param(disable_bypass, bool, 0444);
@@ -423,12 +43,6 @@ module_param(disable_msipolling, bool, 0444);
 MODULE_PARM_DESC(disable_msipolling,
        "Disable MSI-based polling for CMD_SYNC completion.");
 
-enum pri_resp {
-       PRI_RESP_DENY = 0,
-       PRI_RESP_FAIL = 1,
-       PRI_RESP_SUCC = 2,
-};
-
 enum arm_smmu_msi_index {
        EVTQ_MSI_INDEX,
        GERROR_MSI_INDEX,
@@ -454,279 +68,6 @@ static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
        },
 };
 
-struct arm_smmu_cmdq_ent {
-       /* Common fields */
-       u8                              opcode;
-       bool                            substream_valid;
-
-       /* Command-specific fields */
-       union {
-               #define CMDQ_OP_PREFETCH_CFG    0x1
-               struct {
-                       u32                     sid;
-                       u8                      size;
-                       u64                     addr;
-               } prefetch;
-
-               #define CMDQ_OP_CFGI_STE        0x3
-               #define CMDQ_OP_CFGI_ALL        0x4
-               #define CMDQ_OP_CFGI_CD         0x5
-               #define CMDQ_OP_CFGI_CD_ALL     0x6
-               struct {
-                       u32                     sid;
-                       u32                     ssid;
-                       union {
-                               bool            leaf;
-                               u8              span;
-                       };
-               } cfgi;
-
-               #define CMDQ_OP_TLBI_NH_ASID    0x11
-               #define CMDQ_OP_TLBI_NH_VA      0x12
-               #define CMDQ_OP_TLBI_EL2_ALL    0x20
-               #define CMDQ_OP_TLBI_S12_VMALL  0x28
-               #define CMDQ_OP_TLBI_S2_IPA     0x2a
-               #define CMDQ_OP_TLBI_NSNH_ALL   0x30
-               struct {
-                       u8                      num;
-                       u8                      scale;
-                       u16                     asid;
-                       u16                     vmid;
-                       bool                    leaf;
-                       u8                      ttl;
-                       u8                      tg;
-                       u64                     addr;
-               } tlbi;
-
-               #define CMDQ_OP_ATC_INV         0x40
-               #define ATC_INV_SIZE_ALL        52
-               struct {
-                       u32                     sid;
-                       u32                     ssid;
-                       u64                     addr;
-                       u8                      size;
-                       bool                    global;
-               } atc;
-
-               #define CMDQ_OP_PRI_RESP        0x41
-               struct {
-                       u32                     sid;
-                       u32                     ssid;
-                       u16                     grpid;
-                       enum pri_resp           resp;
-               } pri;
-
-               #define CMDQ_OP_CMD_SYNC        0x46
-               struct {
-                       u64                     msiaddr;
-               } sync;
-       };
-};
-
-struct arm_smmu_ll_queue {
-       union {
-               u64                     val;
-               struct {
-                       u32             prod;
-                       u32             cons;
-               };
-               struct {
-                       atomic_t        prod;
-                       atomic_t        cons;
-               } atomic;
-               u8                      __pad[SMP_CACHE_BYTES];
-       } ____cacheline_aligned_in_smp;
-       u32                             max_n_shift;
-};
-
-struct arm_smmu_queue {
-       struct arm_smmu_ll_queue        llq;
-       int                             irq; /* Wired interrupt */
-
-       __le64                          *base;
-       dma_addr_t                      base_dma;
-       u64                             q_base;
-
-       size_t                          ent_dwords;
-
-       u32 __iomem                     *prod_reg;
-       u32 __iomem                     *cons_reg;
-};
-
-struct arm_smmu_queue_poll {
-       ktime_t                         timeout;
-       unsigned int                    delay;
-       unsigned int                    spin_cnt;
-       bool                            wfe;
-};
-
-struct arm_smmu_cmdq {
-       struct arm_smmu_queue           q;
-       atomic_long_t                   *valid_map;
-       atomic_t                        owner_prod;
-       atomic_t                        lock;
-};
-
-struct arm_smmu_cmdq_batch {
-       u64                             cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
-       int                             num;
-};
-
-struct arm_smmu_evtq {
-       struct arm_smmu_queue           q;
-       u32                             max_stalls;
-};
-
-struct arm_smmu_priq {
-       struct arm_smmu_queue           q;
-};
-
-/* High-level stream table and context descriptor structures */
-struct arm_smmu_strtab_l1_desc {
-       u8                              span;
-
-       __le64                          *l2ptr;
-       dma_addr_t                      l2ptr_dma;
-};
-
-struct arm_smmu_ctx_desc {
-       u16                             asid;
-       u64                             ttbr;
-       u64                             tcr;
-       u64                             mair;
-};
-
-struct arm_smmu_l1_ctx_desc {
-       __le64                          *l2ptr;
-       dma_addr_t                      l2ptr_dma;
-};
-
-struct arm_smmu_ctx_desc_cfg {
-       __le64                          *cdtab;
-       dma_addr_t                      cdtab_dma;
-       struct arm_smmu_l1_ctx_desc     *l1_desc;
-       unsigned int                    num_l1_ents;
-};
-
-struct arm_smmu_s1_cfg {
-       struct arm_smmu_ctx_desc_cfg    cdcfg;
-       struct arm_smmu_ctx_desc        cd;
-       u8                              s1fmt;
-       u8                              s1cdmax;
-};
-
-struct arm_smmu_s2_cfg {
-       u16                             vmid;
-       u64                             vttbr;
-       u64                             vtcr;
-};
-
-struct arm_smmu_strtab_cfg {
-       __le64                          *strtab;
-       dma_addr_t                      strtab_dma;
-       struct arm_smmu_strtab_l1_desc  *l1_desc;
-       unsigned int                    num_l1_ents;
-
-       u64                             strtab_base;
-       u32                             strtab_base_cfg;
-};
-
-/* An SMMUv3 instance */
-struct arm_smmu_device {
-       struct device                   *dev;
-       void __iomem                    *base;
-       void __iomem                    *page1;
-
-#define ARM_SMMU_FEAT_2_LVL_STRTAB     (1 << 0)
-#define ARM_SMMU_FEAT_2_LVL_CDTAB      (1 << 1)
-#define ARM_SMMU_FEAT_TT_LE            (1 << 2)
-#define ARM_SMMU_FEAT_TT_BE            (1 << 3)
-#define ARM_SMMU_FEAT_PRI              (1 << 4)
-#define ARM_SMMU_FEAT_ATS              (1 << 5)
-#define ARM_SMMU_FEAT_SEV              (1 << 6)
-#define ARM_SMMU_FEAT_MSI              (1 << 7)
-#define ARM_SMMU_FEAT_COHERENCY                (1 << 8)
-#define ARM_SMMU_FEAT_TRANS_S1         (1 << 9)
-#define ARM_SMMU_FEAT_TRANS_S2         (1 << 10)
-#define ARM_SMMU_FEAT_STALLS           (1 << 11)
-#define ARM_SMMU_FEAT_HYP              (1 << 12)
-#define ARM_SMMU_FEAT_STALL_FORCE      (1 << 13)
-#define ARM_SMMU_FEAT_VAX              (1 << 14)
-#define ARM_SMMU_FEAT_RANGE_INV                (1 << 15)
-       u32                             features;
-
-#define ARM_SMMU_OPT_SKIP_PREFETCH     (1 << 0)
-#define ARM_SMMU_OPT_PAGE0_REGS_ONLY   (1 << 1)
-#define ARM_SMMU_OPT_MSIPOLL           (1 << 2)
-       u32                             options;
-
-       struct arm_smmu_cmdq            cmdq;
-       struct arm_smmu_evtq            evtq;
-       struct arm_smmu_priq            priq;
-
-       int                             gerr_irq;
-       int                             combined_irq;
-
-       unsigned long                   ias; /* IPA */
-       unsigned long                   oas; /* PA */
-       unsigned long                   pgsize_bitmap;
-
-#define ARM_SMMU_MAX_ASIDS             (1 << 16)
-       unsigned int                    asid_bits;
-
-#define ARM_SMMU_MAX_VMIDS             (1 << 16)
-       unsigned int                    vmid_bits;
-       DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
-
-       unsigned int                    ssid_bits;
-       unsigned int                    sid_bits;
-
-       struct arm_smmu_strtab_cfg      strtab_cfg;
-
-       /* IOMMU core code handle */
-       struct iommu_device             iommu;
-};
-
-/* SMMU private data for each master */
-struct arm_smmu_master {
-       struct arm_smmu_device          *smmu;
-       struct device                   *dev;
-       struct arm_smmu_domain          *domain;
-       struct list_head                domain_head;
-       u32                             *sids;
-       unsigned int                    num_sids;
-       bool                            ats_enabled;
-       unsigned int                    ssid_bits;
-};
-
-/* SMMU private data for an IOMMU domain */
-enum arm_smmu_domain_stage {
-       ARM_SMMU_DOMAIN_S1 = 0,
-       ARM_SMMU_DOMAIN_S2,
-       ARM_SMMU_DOMAIN_NESTED,
-       ARM_SMMU_DOMAIN_BYPASS,
-};
-
-struct arm_smmu_domain {
-       struct arm_smmu_device          *smmu;
-       struct mutex                    init_mutex; /* Protects smmu pointer */
-
-       struct io_pgtable_ops           *pgtbl_ops;
-       bool                            non_strict;
-       atomic_t                        nr_ats_masters;
-
-       enum arm_smmu_domain_stage      stage;
-       union {
-               struct arm_smmu_s1_cfg  s1_cfg;
-               struct arm_smmu_s2_cfg  s2_cfg;
-       };
-
-       struct iommu_domain             domain;
-
-       struct list_head                devices;
-       spinlock_t                      devices_lock;
-};
-
 struct arm_smmu_option_prop {
        u32 opt;
        const char *prop;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
new file mode 100644 (file)
index 0000000..c4c0c13
--- /dev/null
@@ -0,0 +1,676 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * IOMMU API for ARM architected SMMUv3 implementations.
+ *
+ * Copyright (C) 2015 ARM Limited
+ */
+
+#ifndef _ARM_SMMU_V3_H
+#define _ARM_SMMU_V3_H
+
+#include <linux/bitfield.h>
+#include <linux/iommu.h>
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/sizes.h>
+
+/* MMIO registers */
+#define ARM_SMMU_IDR0                  0x0
+#define IDR0_ST_LVL                    GENMASK(28, 27)
+#define IDR0_ST_LVL_2LVL               1
+#define IDR0_STALL_MODEL               GENMASK(25, 24)
+#define IDR0_STALL_MODEL_STALL         0
+#define IDR0_STALL_MODEL_FORCE         2
+#define IDR0_TTENDIAN                  GENMASK(22, 21)
+#define IDR0_TTENDIAN_MIXED            0
+#define IDR0_TTENDIAN_LE               2
+#define IDR0_TTENDIAN_BE               3
+#define IDR0_CD2L                      (1 << 19)
+#define IDR0_VMID16                    (1 << 18)
+#define IDR0_PRI                       (1 << 16)
+#define IDR0_SEV                       (1 << 14)
+#define IDR0_MSI                       (1 << 13)
+#define IDR0_ASID16                    (1 << 12)
+#define IDR0_ATS                       (1 << 10)
+#define IDR0_HYP                       (1 << 9)
+#define IDR0_COHACC                    (1 << 4)
+#define IDR0_TTF                       GENMASK(3, 2)
+#define IDR0_TTF_AARCH64               2
+#define IDR0_TTF_AARCH32_64            3
+#define IDR0_S1P                       (1 << 1)
+#define IDR0_S2P                       (1 << 0)
+
+#define ARM_SMMU_IDR1                  0x4
+#define IDR1_TABLES_PRESET             (1 << 30)
+#define IDR1_QUEUES_PRESET             (1 << 29)
+#define IDR1_REL                       (1 << 28)
+#define IDR1_CMDQS                     GENMASK(25, 21)
+#define IDR1_EVTQS                     GENMASK(20, 16)
+#define IDR1_PRIQS                     GENMASK(15, 11)
+#define IDR1_SSIDSIZE                  GENMASK(10, 6)
+#define IDR1_SIDSIZE                   GENMASK(5, 0)
+
+#define ARM_SMMU_IDR3                  0xc
+#define IDR3_RIL                       (1 << 10)
+
+#define ARM_SMMU_IDR5                  0x14
+#define IDR5_STALL_MAX                 GENMASK(31, 16)
+#define IDR5_GRAN64K                   (1 << 6)
+#define IDR5_GRAN16K                   (1 << 5)
+#define IDR5_GRAN4K                    (1 << 4)
+#define IDR5_OAS                       GENMASK(2, 0)
+#define IDR5_OAS_32_BIT                        0
+#define IDR5_OAS_36_BIT                        1
+#define IDR5_OAS_40_BIT                        2
+#define IDR5_OAS_42_BIT                        3
+#define IDR5_OAS_44_BIT                        4
+#define IDR5_OAS_48_BIT                        5
+#define IDR5_OAS_52_BIT                        6
+#define IDR5_VAX                       GENMASK(11, 10)
+#define IDR5_VAX_52_BIT                        1
+
+#define ARM_SMMU_CR0                   0x20
+#define CR0_ATSCHK                     (1 << 4)
+#define CR0_CMDQEN                     (1 << 3)
+#define CR0_EVTQEN                     (1 << 2)
+#define CR0_PRIQEN                     (1 << 1)
+#define CR0_SMMUEN                     (1 << 0)
+
+#define ARM_SMMU_CR0ACK                        0x24
+
+#define ARM_SMMU_CR1                   0x28
+#define CR1_TABLE_SH                   GENMASK(11, 10)
+#define CR1_TABLE_OC                   GENMASK(9, 8)
+#define CR1_TABLE_IC                   GENMASK(7, 6)
+#define CR1_QUEUE_SH                   GENMASK(5, 4)
+#define CR1_QUEUE_OC                   GENMASK(3, 2)
+#define CR1_QUEUE_IC                   GENMASK(1, 0)
+/* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
+#define CR1_CACHE_NC                   0
+#define CR1_CACHE_WB                   1
+#define CR1_CACHE_WT                   2
+
+#define ARM_SMMU_CR2                   0x2c
+#define CR2_PTM                                (1 << 2)
+#define CR2_RECINVSID                  (1 << 1)
+#define CR2_E2H                                (1 << 0)
+
+#define ARM_SMMU_GBPA                  0x44
+#define GBPA_UPDATE                    (1 << 31)
+#define GBPA_ABORT                     (1 << 20)
+
+#define ARM_SMMU_IRQ_CTRL              0x50
+#define IRQ_CTRL_EVTQ_IRQEN            (1 << 2)
+#define IRQ_CTRL_PRIQ_IRQEN            (1 << 1)
+#define IRQ_CTRL_GERROR_IRQEN          (1 << 0)
+
+#define ARM_SMMU_IRQ_CTRLACK           0x54
+
+#define ARM_SMMU_GERROR                        0x60
+#define GERROR_SFM_ERR                 (1 << 8)
+#define GERROR_MSI_GERROR_ABT_ERR      (1 << 7)
+#define GERROR_MSI_PRIQ_ABT_ERR                (1 << 6)
+#define GERROR_MSI_EVTQ_ABT_ERR                (1 << 5)
+#define GERROR_MSI_CMDQ_ABT_ERR                (1 << 4)
+#define GERROR_PRIQ_ABT_ERR            (1 << 3)
+#define GERROR_EVTQ_ABT_ERR            (1 << 2)
+#define GERROR_CMDQ_ERR                        (1 << 0)
+#define GERROR_ERR_MASK                        0xfd
+
+#define ARM_SMMU_GERRORN               0x64
+
+#define ARM_SMMU_GERROR_IRQ_CFG0       0x68
+#define ARM_SMMU_GERROR_IRQ_CFG1       0x70
+#define ARM_SMMU_GERROR_IRQ_CFG2       0x74
+
+#define ARM_SMMU_STRTAB_BASE           0x80
+#define STRTAB_BASE_RA                 (1UL << 62)
+#define STRTAB_BASE_ADDR_MASK          GENMASK_ULL(51, 6)
+
+#define ARM_SMMU_STRTAB_BASE_CFG       0x88
+#define STRTAB_BASE_CFG_FMT            GENMASK(17, 16)
+#define STRTAB_BASE_CFG_FMT_LINEAR     0
+#define STRTAB_BASE_CFG_FMT_2LVL       1
+#define STRTAB_BASE_CFG_SPLIT          GENMASK(10, 6)
+#define STRTAB_BASE_CFG_LOG2SIZE       GENMASK(5, 0)
+
+#define ARM_SMMU_CMDQ_BASE             0x90
+#define ARM_SMMU_CMDQ_PROD             0x98
+#define ARM_SMMU_CMDQ_CONS             0x9c
+
+#define ARM_SMMU_EVTQ_BASE             0xa0
+#define ARM_SMMU_EVTQ_PROD             0x100a8
+#define ARM_SMMU_EVTQ_CONS             0x100ac
+#define ARM_SMMU_EVTQ_IRQ_CFG0         0xb0
+#define ARM_SMMU_EVTQ_IRQ_CFG1         0xb8
+#define ARM_SMMU_EVTQ_IRQ_CFG2         0xbc
+
+#define ARM_SMMU_PRIQ_BASE             0xc0
+#define ARM_SMMU_PRIQ_PROD             0x100c8
+#define ARM_SMMU_PRIQ_CONS             0x100cc
+#define ARM_SMMU_PRIQ_IRQ_CFG0         0xd0
+#define ARM_SMMU_PRIQ_IRQ_CFG1         0xd8
+#define ARM_SMMU_PRIQ_IRQ_CFG2         0xdc
+
+#define ARM_SMMU_REG_SZ                        0xe00
+
+/* Common MSI config fields */
+#define MSI_CFG0_ADDR_MASK             GENMASK_ULL(51, 2)
+#define MSI_CFG2_SH                    GENMASK(5, 4)
+#define MSI_CFG2_MEMATTR               GENMASK(3, 0)
+
+/* Common memory attribute values */
+#define ARM_SMMU_SH_NSH                        0
+#define ARM_SMMU_SH_OSH                        2
+#define ARM_SMMU_SH_ISH                        3
+#define ARM_SMMU_MEMATTR_DEVICE_nGnRE  0x1
+#define ARM_SMMU_MEMATTR_OIWB          0xf
+
+#define Q_IDX(llq, p)                  ((p) & ((1 << (llq)->max_n_shift) - 1))
+#define Q_WRP(llq, p)                  ((p) & (1 << (llq)->max_n_shift))
+#define Q_OVERFLOW_FLAG                        (1U << 31)
+#define Q_OVF(p)                       ((p) & Q_OVERFLOW_FLAG)
+#define Q_ENT(q, p)                    ((q)->base +                    \
+                                        Q_IDX(&((q)->llq), p) *        \
+                                        (q)->ent_dwords)
+
+#define Q_BASE_RWA                     (1UL << 62)
+#define Q_BASE_ADDR_MASK               GENMASK_ULL(51, 5)
+#define Q_BASE_LOG2SIZE                        GENMASK(4, 0)
+
+/* Ensure DMA allocations are naturally aligned */
+#ifdef CONFIG_CMA_ALIGNMENT
+#define Q_MAX_SZ_SHIFT                 (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
+#else
+#define Q_MAX_SZ_SHIFT                 (PAGE_SHIFT + MAX_ORDER - 1)
+#endif
+
+/*
+ * Stream table.
+ *
+ * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
+ * 2lvl: 128k L1 entries,
+ *       256 lazy entries per table (each table covers a PCI bus)
+ */
+#define STRTAB_L1_SZ_SHIFT             20
+#define STRTAB_SPLIT                   8
+
+#define STRTAB_L1_DESC_DWORDS          1
+#define STRTAB_L1_DESC_SPAN            GENMASK_ULL(4, 0)
+#define STRTAB_L1_DESC_L2PTR_MASK      GENMASK_ULL(51, 6)
+
+#define STRTAB_STE_DWORDS              8
+#define STRTAB_STE_0_V                 (1UL << 0)
+#define STRTAB_STE_0_CFG               GENMASK_ULL(3, 1)
+#define STRTAB_STE_0_CFG_ABORT         0
+#define STRTAB_STE_0_CFG_BYPASS                4
+#define STRTAB_STE_0_CFG_S1_TRANS      5
+#define STRTAB_STE_0_CFG_S2_TRANS      6
+
+#define STRTAB_STE_0_S1FMT             GENMASK_ULL(5, 4)
+#define STRTAB_STE_0_S1FMT_LINEAR      0
+#define STRTAB_STE_0_S1FMT_64K_L2      2
+#define STRTAB_STE_0_S1CTXPTR_MASK     GENMASK_ULL(51, 6)
+#define STRTAB_STE_0_S1CDMAX           GENMASK_ULL(63, 59)
+
+#define STRTAB_STE_1_S1DSS             GENMASK_ULL(1, 0)
+#define STRTAB_STE_1_S1DSS_TERMINATE   0x0
+#define STRTAB_STE_1_S1DSS_BYPASS      0x1
+#define STRTAB_STE_1_S1DSS_SSID0       0x2
+
+#define STRTAB_STE_1_S1C_CACHE_NC      0UL
+#define STRTAB_STE_1_S1C_CACHE_WBRA    1UL
+#define STRTAB_STE_1_S1C_CACHE_WT      2UL
+#define STRTAB_STE_1_S1C_CACHE_WB      3UL
+#define STRTAB_STE_1_S1CIR             GENMASK_ULL(3, 2)
+#define STRTAB_STE_1_S1COR             GENMASK_ULL(5, 4)
+#define STRTAB_STE_1_S1CSH             GENMASK_ULL(7, 6)
+
+#define STRTAB_STE_1_S1STALLD          (1UL << 27)
+
+#define STRTAB_STE_1_EATS              GENMASK_ULL(29, 28)
+#define STRTAB_STE_1_EATS_ABT          0UL
+#define STRTAB_STE_1_EATS_TRANS                1UL
+#define STRTAB_STE_1_EATS_S1CHK                2UL
+
+#define STRTAB_STE_1_STRW              GENMASK_ULL(31, 30)
+#define STRTAB_STE_1_STRW_NSEL1                0UL
+#define STRTAB_STE_1_STRW_EL2          2UL
+
+#define STRTAB_STE_1_SHCFG             GENMASK_ULL(45, 44)
+#define STRTAB_STE_1_SHCFG_INCOMING    1UL
+
+#define STRTAB_STE_2_S2VMID            GENMASK_ULL(15, 0)
+#define STRTAB_STE_2_VTCR              GENMASK_ULL(50, 32)
+#define STRTAB_STE_2_VTCR_S2T0SZ       GENMASK_ULL(5, 0)
+#define STRTAB_STE_2_VTCR_S2SL0                GENMASK_ULL(7, 6)
+#define STRTAB_STE_2_VTCR_S2IR0                GENMASK_ULL(9, 8)
+#define STRTAB_STE_2_VTCR_S2OR0                GENMASK_ULL(11, 10)
+#define STRTAB_STE_2_VTCR_S2SH0                GENMASK_ULL(13, 12)
+#define STRTAB_STE_2_VTCR_S2TG         GENMASK_ULL(15, 14)
+#define STRTAB_STE_2_VTCR_S2PS         GENMASK_ULL(18, 16)
+#define STRTAB_STE_2_S2AA64            (1UL << 51)
+#define STRTAB_STE_2_S2ENDI            (1UL << 52)
+#define STRTAB_STE_2_S2PTW             (1UL << 54)
+#define STRTAB_STE_2_S2R               (1UL << 58)
+
+#define STRTAB_STE_3_S2TTB_MASK                GENMASK_ULL(51, 4)
+
+/*
+ * Context descriptors.
+ *
+ * Linear: when less than 1024 SSIDs are supported
+ * 2lvl: at most 1024 L1 entries,
+ *       1024 lazy entries per table.
+ */
+#define CTXDESC_SPLIT                  10
+#define CTXDESC_L2_ENTRIES             (1 << CTXDESC_SPLIT)
+
+#define CTXDESC_L1_DESC_DWORDS         1
+#define CTXDESC_L1_DESC_V              (1UL << 0)
+#define CTXDESC_L1_DESC_L2PTR_MASK     GENMASK_ULL(51, 12)
+
+#define CTXDESC_CD_DWORDS              8
+#define CTXDESC_CD_0_TCR_T0SZ          GENMASK_ULL(5, 0)
+#define CTXDESC_CD_0_TCR_TG0           GENMASK_ULL(7, 6)
+#define CTXDESC_CD_0_TCR_IRGN0         GENMASK_ULL(9, 8)
+#define CTXDESC_CD_0_TCR_ORGN0         GENMASK_ULL(11, 10)
+#define CTXDESC_CD_0_TCR_SH0           GENMASK_ULL(13, 12)
+#define CTXDESC_CD_0_TCR_EPD0          (1ULL << 14)
+#define CTXDESC_CD_0_TCR_EPD1          (1ULL << 30)
+
+#define CTXDESC_CD_0_ENDI              (1UL << 15)
+#define CTXDESC_CD_0_V                 (1UL << 31)
+
+#define CTXDESC_CD_0_TCR_IPS           GENMASK_ULL(34, 32)
+#define CTXDESC_CD_0_TCR_TBI0          (1ULL << 38)
+
+#define CTXDESC_CD_0_AA64              (1UL << 41)
+#define CTXDESC_CD_0_S                 (1UL << 44)
+#define CTXDESC_CD_0_R                 (1UL << 45)
+#define CTXDESC_CD_0_A                 (1UL << 46)
+#define CTXDESC_CD_0_ASET              (1UL << 47)
+#define CTXDESC_CD_0_ASID              GENMASK_ULL(63, 48)
+
+#define CTXDESC_CD_1_TTB0_MASK         GENMASK_ULL(51, 4)
+
+/*
+ * When the SMMU only supports linear context descriptor tables, pick a
+ * reasonable size limit (64kB).
+ */
+#define CTXDESC_LINEAR_CDMAX           ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
+
+/* Command queue */
+#define CMDQ_ENT_SZ_SHIFT              4
+#define CMDQ_ENT_DWORDS                        ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
+#define CMDQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
+
+#define CMDQ_CONS_ERR                  GENMASK(30, 24)
+#define CMDQ_ERR_CERROR_NONE_IDX       0
+#define CMDQ_ERR_CERROR_ILL_IDX                1
+#define CMDQ_ERR_CERROR_ABT_IDX                2
+#define CMDQ_ERR_CERROR_ATC_INV_IDX    3
+
+#define CMDQ_PROD_OWNED_FLAG           Q_OVERFLOW_FLAG
+
+/*
+ * This is used to size the command queue and therefore must be at least
+ * BITS_PER_LONG so that the valid_map works correctly (it relies on the
+ * total number of queue entries being a multiple of BITS_PER_LONG).
+ */
+#define CMDQ_BATCH_ENTRIES             BITS_PER_LONG
+
+#define CMDQ_0_OP                      GENMASK_ULL(7, 0)
+#define CMDQ_0_SSV                     (1UL << 11)
+
+#define CMDQ_PREFETCH_0_SID            GENMASK_ULL(63, 32)
+#define CMDQ_PREFETCH_1_SIZE           GENMASK_ULL(4, 0)
+#define CMDQ_PREFETCH_1_ADDR_MASK      GENMASK_ULL(63, 12)
+
+#define CMDQ_CFGI_0_SSID               GENMASK_ULL(31, 12)
+#define CMDQ_CFGI_0_SID                        GENMASK_ULL(63, 32)
+#define CMDQ_CFGI_1_LEAF               (1UL << 0)
+#define CMDQ_CFGI_1_RANGE              GENMASK_ULL(4, 0)
+
+#define CMDQ_TLBI_0_NUM                        GENMASK_ULL(16, 12)
+#define CMDQ_TLBI_RANGE_NUM_MAX                31
+#define CMDQ_TLBI_0_SCALE              GENMASK_ULL(24, 20)
+#define CMDQ_TLBI_0_VMID               GENMASK_ULL(47, 32)
+#define CMDQ_TLBI_0_ASID               GENMASK_ULL(63, 48)
+#define CMDQ_TLBI_1_LEAF               (1UL << 0)
+#define CMDQ_TLBI_1_TTL                        GENMASK_ULL(9, 8)
+#define CMDQ_TLBI_1_TG                 GENMASK_ULL(11, 10)
+#define CMDQ_TLBI_1_VA_MASK            GENMASK_ULL(63, 12)
+#define CMDQ_TLBI_1_IPA_MASK           GENMASK_ULL(51, 12)
+
+#define CMDQ_ATC_0_SSID                        GENMASK_ULL(31, 12)
+#define CMDQ_ATC_0_SID                 GENMASK_ULL(63, 32)
+#define CMDQ_ATC_0_GLOBAL              (1UL << 9)
+#define CMDQ_ATC_1_SIZE                        GENMASK_ULL(5, 0)
+#define CMDQ_ATC_1_ADDR_MASK           GENMASK_ULL(63, 12)
+
+#define CMDQ_PRI_0_SSID                        GENMASK_ULL(31, 12)
+#define CMDQ_PRI_0_SID                 GENMASK_ULL(63, 32)
+#define CMDQ_PRI_1_GRPID               GENMASK_ULL(8, 0)
+#define CMDQ_PRI_1_RESP                        GENMASK_ULL(13, 12)
+
+#define CMDQ_SYNC_0_CS                 GENMASK_ULL(13, 12)
+#define CMDQ_SYNC_0_CS_NONE            0
+#define CMDQ_SYNC_0_CS_IRQ             1
+#define CMDQ_SYNC_0_CS_SEV             2
+#define CMDQ_SYNC_0_MSH                        GENMASK_ULL(23, 22)
+#define CMDQ_SYNC_0_MSIATTR            GENMASK_ULL(27, 24)
+#define CMDQ_SYNC_0_MSIDATA            GENMASK_ULL(63, 32)
+#define CMDQ_SYNC_1_MSIADDR_MASK       GENMASK_ULL(51, 2)
+
+/* Event queue */
+#define EVTQ_ENT_SZ_SHIFT              5
+#define EVTQ_ENT_DWORDS                        ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
+#define EVTQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
+
+#define EVTQ_0_ID                      GENMASK_ULL(7, 0)
+
+/* PRI queue */
+#define PRIQ_ENT_SZ_SHIFT              4
+#define PRIQ_ENT_DWORDS                        ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
+#define PRIQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
+
+#define PRIQ_0_SID                     GENMASK_ULL(31, 0)
+#define PRIQ_0_SSID                    GENMASK_ULL(51, 32)
+#define PRIQ_0_PERM_PRIV               (1UL << 58)
+#define PRIQ_0_PERM_EXEC               (1UL << 59)
+#define PRIQ_0_PERM_READ               (1UL << 60)
+#define PRIQ_0_PERM_WRITE              (1UL << 61)
+#define PRIQ_0_PRG_LAST                        (1UL << 62)
+#define PRIQ_0_SSID_V                  (1UL << 63)
+
+#define PRIQ_1_PRG_IDX                 GENMASK_ULL(8, 0)
+#define PRIQ_1_ADDR_MASK               GENMASK_ULL(63, 12)
+
+/* High-level queue structures */
+#define ARM_SMMU_POLL_TIMEOUT_US       1000000 /* 1s! */
+#define ARM_SMMU_POLL_SPIN_COUNT       10
+
+#define MSI_IOVA_BASE                  0x8000000
+#define MSI_IOVA_LENGTH                        0x100000
+
+enum pri_resp {
+       PRI_RESP_DENY = 0,
+       PRI_RESP_FAIL = 1,
+       PRI_RESP_SUCC = 2,
+};
+
+struct arm_smmu_cmdq_ent {
+       /* Common fields */
+       u8                              opcode;
+       bool                            substream_valid;
+
+       /* Command-specific fields */
+       union {
+               #define CMDQ_OP_PREFETCH_CFG    0x1
+               struct {
+                       u32                     sid;
+                       u8                      size;
+                       u64                     addr;
+               } prefetch;
+
+               #define CMDQ_OP_CFGI_STE        0x3
+               #define CMDQ_OP_CFGI_ALL        0x4
+               #define CMDQ_OP_CFGI_CD         0x5
+               #define CMDQ_OP_CFGI_CD_ALL     0x6
+               struct {
+                       u32                     sid;
+                       u32                     ssid;
+                       union {
+                               bool            leaf;
+                               u8              span;
+                       };
+               } cfgi;
+
+               #define CMDQ_OP_TLBI_NH_ASID    0x11
+               #define CMDQ_OP_TLBI_NH_VA      0x12
+               #define CMDQ_OP_TLBI_EL2_ALL    0x20
+               #define CMDQ_OP_TLBI_S12_VMALL  0x28
+               #define CMDQ_OP_TLBI_S2_IPA     0x2a
+               #define CMDQ_OP_TLBI_NSNH_ALL   0x30
+               struct {
+                       u8                      num;
+                       u8                      scale;
+                       u16                     asid;
+                       u16                     vmid;
+                       bool                    leaf;
+                       u8                      ttl;
+                       u8                      tg;
+                       u64                     addr;
+               } tlbi;
+
+               #define CMDQ_OP_ATC_INV         0x40
+               #define ATC_INV_SIZE_ALL        52
+               struct {
+                       u32                     sid;
+                       u32                     ssid;
+                       u64                     addr;
+                       u8                      size;
+                       bool                    global;
+               } atc;
+
+               #define CMDQ_OP_PRI_RESP        0x41
+               struct {
+                       u32                     sid;
+                       u32                     ssid;
+                       u16                     grpid;
+                       enum pri_resp           resp;
+               } pri;
+
+               #define CMDQ_OP_CMD_SYNC        0x46
+               struct {
+                       u64                     msiaddr;
+               } sync;
+       };
+};
+
+struct arm_smmu_ll_queue {
+       union {
+               u64                     val;
+               struct {
+                       u32             prod;
+                       u32             cons;
+               };
+               struct {
+                       atomic_t        prod;
+                       atomic_t        cons;
+               } atomic;
+               u8                      __pad[SMP_CACHE_BYTES];
+       } ____cacheline_aligned_in_smp;
+       u32                             max_n_shift;
+};
+
+struct arm_smmu_queue {
+       struct arm_smmu_ll_queue        llq;
+       int                             irq; /* Wired interrupt */
+
+       __le64                          *base;
+       dma_addr_t                      base_dma;
+       u64                             q_base;
+
+       size_t                          ent_dwords;
+
+       u32 __iomem                     *prod_reg;
+       u32 __iomem                     *cons_reg;
+};
+
+struct arm_smmu_queue_poll {
+       ktime_t                         timeout;
+       unsigned int                    delay;
+       unsigned int                    spin_cnt;
+       bool                            wfe;
+};
+
+struct arm_smmu_cmdq {
+       struct arm_smmu_queue           q;
+       atomic_long_t                   *valid_map;
+       atomic_t                        owner_prod;
+       atomic_t                        lock;
+};
+
+struct arm_smmu_cmdq_batch {
+       u64                             cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
+       int                             num;
+};
+
+struct arm_smmu_evtq {
+       struct arm_smmu_queue           q;
+       u32                             max_stalls;
+};
+
+struct arm_smmu_priq {
+       struct arm_smmu_queue           q;
+};
+
+/* High-level stream table and context descriptor structures */
+struct arm_smmu_strtab_l1_desc {
+       u8                              span;
+
+       __le64                          *l2ptr;
+       dma_addr_t                      l2ptr_dma;
+};
+
+struct arm_smmu_ctx_desc {
+       u16                             asid;
+       u64                             ttbr;
+       u64                             tcr;
+       u64                             mair;
+};
+
+struct arm_smmu_l1_ctx_desc {
+       __le64                          *l2ptr;
+       dma_addr_t                      l2ptr_dma;
+};
+
+struct arm_smmu_ctx_desc_cfg {
+       __le64                          *cdtab;
+       dma_addr_t                      cdtab_dma;
+       struct arm_smmu_l1_ctx_desc     *l1_desc;
+       unsigned int                    num_l1_ents;
+};
+
+struct arm_smmu_s1_cfg {
+       struct arm_smmu_ctx_desc_cfg    cdcfg;
+       struct arm_smmu_ctx_desc        cd;
+       u8                              s1fmt;
+       u8                              s1cdmax;
+};
+
+struct arm_smmu_s2_cfg {
+       u16                             vmid;
+       u64                             vttbr;
+       u64                             vtcr;
+};
+
+struct arm_smmu_strtab_cfg {
+       __le64                          *strtab;
+       dma_addr_t                      strtab_dma;
+       struct arm_smmu_strtab_l1_desc  *l1_desc;
+       unsigned int                    num_l1_ents;
+
+       u64                             strtab_base;
+       u32                             strtab_base_cfg;
+};
+
+/* An SMMUv3 instance */
+struct arm_smmu_device {
+       struct device                   *dev;
+       void __iomem                    *base;
+       void __iomem                    *page1;
+
+#define ARM_SMMU_FEAT_2_LVL_STRTAB     (1 << 0)
+#define ARM_SMMU_FEAT_2_LVL_CDTAB      (1 << 1)
+#define ARM_SMMU_FEAT_TT_LE            (1 << 2)
+#define ARM_SMMU_FEAT_TT_BE            (1 << 3)
+#define ARM_SMMU_FEAT_PRI              (1 << 4)
+#define ARM_SMMU_FEAT_ATS              (1 << 5)
+#define ARM_SMMU_FEAT_SEV              (1 << 6)
+#define ARM_SMMU_FEAT_MSI              (1 << 7)
+#define ARM_SMMU_FEAT_COHERENCY                (1 << 8)
+#define ARM_SMMU_FEAT_TRANS_S1         (1 << 9)
+#define ARM_SMMU_FEAT_TRANS_S2         (1 << 10)
+#define ARM_SMMU_FEAT_STALLS           (1 << 11)
+#define ARM_SMMU_FEAT_HYP              (1 << 12)
+#define ARM_SMMU_FEAT_STALL_FORCE      (1 << 13)
+#define ARM_SMMU_FEAT_VAX              (1 << 14)
+#define ARM_SMMU_FEAT_RANGE_INV                (1 << 15)
+       u32                             features;
+
+#define ARM_SMMU_OPT_SKIP_PREFETCH     (1 << 0)
+#define ARM_SMMU_OPT_PAGE0_REGS_ONLY   (1 << 1)
+#define ARM_SMMU_OPT_MSIPOLL           (1 << 2)
+       u32                             options;
+
+       struct arm_smmu_cmdq            cmdq;
+       struct arm_smmu_evtq            evtq;
+       struct arm_smmu_priq            priq;
+
+       int                             gerr_irq;
+       int                             combined_irq;
+
+       unsigned long                   ias; /* IPA */
+       unsigned long                   oas; /* PA */
+       unsigned long                   pgsize_bitmap;
+
+#define ARM_SMMU_MAX_ASIDS             (1 << 16)
+       unsigned int                    asid_bits;
+
+#define ARM_SMMU_MAX_VMIDS             (1 << 16)
+       unsigned int                    vmid_bits;
+       DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
+
+       unsigned int                    ssid_bits;
+       unsigned int                    sid_bits;
+
+       struct arm_smmu_strtab_cfg      strtab_cfg;
+
+       /* IOMMU core code handle */
+       struct iommu_device             iommu;
+};
+
+/* SMMU private data for each master */
+struct arm_smmu_master {
+       struct arm_smmu_device          *smmu;
+       struct device                   *dev;
+       struct arm_smmu_domain          *domain;
+       struct list_head                domain_head;
+       u32                             *sids;
+       unsigned int                    num_sids;
+       bool                            ats_enabled;
+       unsigned int                    ssid_bits;
+};
+
+/* SMMU private data for an IOMMU domain */
+enum arm_smmu_domain_stage {
+       ARM_SMMU_DOMAIN_S1 = 0,
+       ARM_SMMU_DOMAIN_S2,
+       ARM_SMMU_DOMAIN_NESTED,
+       ARM_SMMU_DOMAIN_BYPASS,
+};
+
+struct arm_smmu_domain {
+       struct arm_smmu_device          *smmu;
+       struct mutex                    init_mutex; /* Protects smmu pointer */
+
+       struct io_pgtable_ops           *pgtbl_ops;
+       bool                            non_strict;
+       atomic_t                        nr_ats_masters;
+
+       enum arm_smmu_domain_stage      stage;
+       union {
+               struct arm_smmu_s1_cfg  s1_cfg;
+               struct arm_smmu_s2_cfg  s2_cfg;
+       };
+
+       struct iommu_domain             domain;
+
+       struct list_head                devices;
+       spinlock_t                      devices_lock;
+};
+
+#endif /* _ARM_SMMU_V3_H */