aboutsummaryrefslogtreecommitdiff
path: root/hw/i386/intel_iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/i386/intel_iommu.c')
-rw-r--r--hw/i386/intel_iommu.c1115
1 files changed, 750 insertions, 365 deletions
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 75f075547f..cc8e59674e 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -49,17 +49,24 @@
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
-#define VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write) {\
- if (ret_fr) { \
- ret_fr = -ret_fr; \
- if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { \
- trace_vtd_fault_disabled(); \
- } else { \
- vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); \
- } \
- goto error; \
- } \
-}
+
+/*
+ * PCI bus number (or SID) is not reliable since the device is usaully
+ * initialized before guest can configure the PCI bridge
+ * (SECONDARY_BUS_NUMBER).
+ */
+struct vtd_as_key {
+ PCIBus *bus;
+ uint8_t devfn;
+ uint32_t pasid;
+};
+
+struct vtd_iotlb_key {
+ uint64_t gfn;
+ uint32_t pasid;
+ uint16_t sid;
+ uint8_t level;
+};
static void vtd_address_space_refresh_all(IntelIOMMUState *s);
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
@@ -181,6 +188,18 @@ static void vtd_update_scalable_state(IntelIOMMUState *s)
}
}
+static void vtd_update_iq_dw(IntelIOMMUState *s)
+{
+ uint64_t val = vtd_get_quad_raw(s, DMAR_IQA_REG);
+
+ if (s->ecap & VTD_ECAP_SMTS &&
+ val & VTD_IQA_DW_MASK) {
+ s->iq_dw = true;
+ } else {
+ s->iq_dw = false;
+ }
+}
+
/* Whether the address space needs to notify new mappings */
static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)
{
@@ -188,14 +207,47 @@ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)
}
/* GHashTable functions */
-static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
+static gboolean vtd_iotlb_equal(gconstpointer v1, gconstpointer v2)
+{
+ const struct vtd_iotlb_key *key1 = v1;
+ const struct vtd_iotlb_key *key2 = v2;
+
+ return key1->sid == key2->sid &&
+ key1->pasid == key2->pasid &&
+ key1->level == key2->level &&
+ key1->gfn == key2->gfn;
+}
+
+static guint vtd_iotlb_hash(gconstpointer v)
{
- return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+ const struct vtd_iotlb_key *key = v;
+ uint64_t hash64 = key->gfn | ((uint64_t)(key->sid) << VTD_IOTLB_SID_SHIFT) |
+ (uint64_t)(key->level - 1) << VTD_IOTLB_LVL_SHIFT |
+ (uint64_t)(key->pasid) << VTD_IOTLB_PASID_SHIFT;
+
+ return (guint)((hash64 >> 32) ^ (hash64 & 0xffffffffU));
}
-static guint vtd_uint64_hash(gconstpointer v)
+static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
{
- return (guint)*(const uint64_t *)v;
+ const struct vtd_as_key *key1 = v1;
+ const struct vtd_as_key *key2 = v2;
+
+ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn) &&
+ (key1->pasid == key2->pasid);
+}
+
+/*
+ * Note that we use pointer to PCIBus as the key, so hashing/shifting
+ * based on the pointer value is intended. Note that we deal with
+ * collisions through vtd_as_equal().
+ */
+static guint vtd_as_hash(gconstpointer v)
+{
+ const struct vtd_as_key *key = v;
+ guint value = (guint)(uintptr_t)key->bus;
+
+ return (guint)(value << 8 | key->devfn);
}
static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
@@ -236,22 +288,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
{
VTDAddressSpace *vtd_as;
- VTDBus *vtd_bus;
- GHashTableIter bus_it;
- uint32_t devfn_it;
+ GHashTableIter as_it;
trace_vtd_context_cache_reset();
- g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
+ g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
- while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
- for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
- vtd_as = vtd_bus->dev_as[devfn_it];
- if (!vtd_as) {
- continue;
- }
- vtd_as->context_cache_entry.context_cache_gen = 0;
- }
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
+ vtd_as->context_cache_entry.context_cache_gen = 0;
}
s->context_cache_gen = 1;
}
@@ -278,13 +322,6 @@ static void vtd_reset_caches(IntelIOMMUState *s)
vtd_iommu_unlock(s);
}
-static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
- uint32_t level)
-{
- return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
- ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT);
-}
-
static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
{
return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
@@ -292,15 +329,17 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
/* Must be called with IOMMU lock held */
static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
- hwaddr addr)
+ uint32_t pasid, hwaddr addr)
{
+ struct vtd_iotlb_key key;
VTDIOTLBEntry *entry;
- uint64_t key;
int level;
for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
- key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
- source_id, level);
+ key.gfn = vtd_get_iotlb_gfn(addr, level);
+ key.level = level;
+ key.sid = source_id;
+ key.pasid = pasid;
entry = g_hash_table_lookup(s->iotlb, &key);
if (entry) {
goto out;
@@ -314,10 +353,11 @@ out:
/* Must be with IOMMU lock held */
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint16_t domain_id, hwaddr addr, uint64_t slpte,
- uint8_t access_flags, uint32_t level)
+ uint8_t access_flags, uint32_t level,
+ uint32_t pasid)
{
VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
- uint64_t *key = g_malloc(sizeof(*key));
+ struct vtd_iotlb_key *key = g_malloc(sizeof(*key));
uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
@@ -331,7 +371,13 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
entry->slpte = slpte;
entry->access_flags = access_flags;
entry->mask = vtd_slpt_level_page_mask(level);
- *key = vtd_get_iotlb_key(gfn, source_id, level);
+ entry->pasid = pasid;
+
+ key->gfn = gfn;
+ key->sid = source_id;
+ key->level = level;
+ key->pasid = pasid;
+
g_hash_table_replace(s->iotlb, key, entry);
}
@@ -351,7 +397,7 @@ static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
trace_vtd_irq_generate(msi.address, msi.data);
- apic_get_class()->send_msi(&msi);
+ apic_get_class(NULL)->send_msi(&msi);
}
/* Generate a fault event to software via MSI if conditions are met.
@@ -423,19 +469,12 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
/* Must not update F field now, should be done later */
static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
- uint16_t source_id, hwaddr addr,
- VTDFaultReason fault, bool is_write)
+ uint64_t hi, uint64_t lo)
{
- uint64_t hi = 0, lo;
hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
assert(index < DMAR_FRCD_REG_NR);
- lo = VTD_FRCD_FI(addr);
- hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
- if (!is_write) {
- hi |= VTD_FRCD_T;
- }
vtd_set_quad_raw(s, frcd_reg_addr, lo);
vtd_set_quad_raw(s, frcd_reg_addr + 8, hi);
@@ -461,21 +500,11 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
}
/* Log and report an DMAR (address translation) fault to software */
-static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
- hwaddr addr, VTDFaultReason fault,
- bool is_write)
+static void vtd_report_frcd_fault(IntelIOMMUState *s, uint64_t source_id,
+ uint64_t hi, uint64_t lo)
{
uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
- assert(fault < VTD_FR_MAX);
-
- if (fault == VTD_FR_RESERVED_ERR) {
- /* This is not a normal fault reason case. Drop it. */
- return;
- }
-
- trace_vtd_dmar_fault(source_id, fault, addr, is_write);
-
if (fsts_reg & VTD_FSTS_PFO) {
error_report_once("New fault is not recorded due to "
"Primary Fault Overflow");
@@ -495,7 +524,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
return;
}
- vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
+ vtd_record_frcd(s, s->next_frcd_reg, hi, lo);
if (fsts_reg & VTD_FSTS_PPF) {
error_report_once("There are pending faults already, "
@@ -520,6 +549,40 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
}
}
+/* Log and report an DMAR (address translation) fault to software */
+static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
+ hwaddr addr, VTDFaultReason fault,
+ bool is_write, bool is_pasid,
+ uint32_t pasid)
+{
+ uint64_t hi, lo;
+
+ assert(fault < VTD_FR_MAX);
+
+ trace_vtd_dmar_fault(source_id, fault, addr, is_write);
+
+ lo = VTD_FRCD_FI(addr);
+ hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) |
+ VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid);
+ if (!is_write) {
+ hi |= VTD_FRCD_T;
+ }
+
+ vtd_report_frcd_fault(s, source_id, hi, lo);
+}
+
+
+static void vtd_report_ir_fault(IntelIOMMUState *s, uint64_t source_id,
+ VTDFaultReason fault, uint16_t index)
+{
+ uint64_t hi, lo;
+
+ lo = VTD_FRCD_IR_IDX(index);
+ hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
+
+ vtd_report_frcd_fault(s, source_id, hi, lo);
+}
+
/* Handle Invalidation Queue Errors of queued invalidation interface error
* conditions.
*/
@@ -569,7 +632,8 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
dma_addr_t addr;
addr = s->root + index * sizeof(*re);
- if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
+ if (dma_memory_read(&address_space_memory, addr,
+ re, sizeof(*re), MEMTXATTRS_UNSPECIFIED)) {
re->lo = 0;
return -VTD_FR_ROOT_TABLE_INV;
}
@@ -602,7 +666,8 @@ static int vtd_get_context_entry_from_root(IntelIOMMUState *s,
}
addr = addr + index * ce_size;
- if (dma_memory_read(&address_space_memory, addr, ce, ce_size)) {
+ if (dma_memory_read(&address_space_memory, addr,
+ ce, ce_size, MEMTXATTRS_UNSPECIFIED)) {
return -VTD_FR_CONTEXT_TABLE_INV;
}
@@ -639,8 +704,8 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
assert(index < VTD_SL_PT_ENTRY_NR);
if (dma_memory_read(&address_space_memory,
- base_addr + index * sizeof(slpte), &slpte,
- sizeof(slpte))) {
+ base_addr + index * sizeof(slpte),
+ &slpte, sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) {
slpte = (uint64_t)-1;
return slpte;
}
@@ -704,10 +769,13 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
index = VTD_PASID_DIR_INDEX(pasid);
entry_size = VTD_PASID_DIR_ENTRY_SIZE;
addr = pasid_dir_base + index * entry_size;
- if (dma_memory_read(&address_space_memory, addr, pdire, entry_size)) {
+ if (dma_memory_read(&address_space_memory, addr,
+ pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
return -VTD_FR_PASID_TABLE_INV;
}
+ pdire->val = le64_to_cpu(pdire->val);
+
return 0;
}
@@ -728,9 +796,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
index = VTD_PASID_TABLE_INDEX(pasid);
entry_size = VTD_PASID_ENTRY_SIZE;
addr = addr + index * entry_size;
- if (dma_memory_read(&address_space_memory, addr, pe, entry_size)) {
+ if (dma_memory_read(&address_space_memory, addr,
+ pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
return -VTD_FR_PASID_TABLE_INV;
}
+ for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
+ pe->val[i] = le64_to_cpu(pe->val[i]);
+ }
/* Do translation type check */
if (!vtd_pe_type_check(x86_iommu, pe)) {
@@ -796,13 +868,15 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
VTDContextEntry *ce,
- VTDPASIDEntry *pe)
+ VTDPASIDEntry *pe,
+ uint32_t pasid)
{
- uint32_t pasid;
dma_addr_t pasid_dir_base;
int ret = 0;
- pasid = VTD_CE_GET_RID2PASID(ce);
+ if (pasid == PCI_NO_PASID) {
+ pasid = VTD_CE_GET_RID2PASID(ce);
+ }
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
@@ -811,15 +885,17 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
VTDContextEntry *ce,
- bool *pe_fpd_set)
+ bool *pe_fpd_set,
+ uint32_t pasid)
{
int ret;
- uint32_t pasid;
dma_addr_t pasid_dir_base;
VTDPASIDDirEntry pdire;
VTDPASIDEntry pe;
- pasid = VTD_CE_GET_RID2PASID(ce);
+ if (pasid == PCI_NO_PASID) {
+ pasid = VTD_CE_GET_RID2PASID(ce);
+ }
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
/*
@@ -865,12 +941,13 @@ static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce)
}
static uint32_t vtd_get_iova_level(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return VTD_PE_GET_LEVEL(&pe);
}
@@ -883,12 +960,13 @@ static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce)
}
static uint32_t vtd_get_iova_agaw(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return 30 + ((pe.val[0] >> 2) & VTD_SM_PASID_ENTRY_AW) * 9;
}
@@ -930,31 +1008,33 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
}
static inline uint64_t vtd_iova_limit(IntelIOMMUState *s,
- VTDContextEntry *ce, uint8_t aw)
+ VTDContextEntry *ce, uint8_t aw,
+ uint32_t pasid)
{
- uint32_t ce_agaw = vtd_get_iova_agaw(s, ce);
+ uint32_t ce_agaw = vtd_get_iova_agaw(s, ce, pasid);
return 1ULL << MIN(ce_agaw, aw);
}
/* Return true if IOVA passes range check, otherwise false. */
static inline bool vtd_iova_range_check(IntelIOMMUState *s,
uint64_t iova, VTDContextEntry *ce,
- uint8_t aw)
+ uint8_t aw, uint32_t pasid)
{
/*
* Check if @iova is above 2^X-1, where X is the minimum of MGAW
* in CAP_REG and AW in context-entry.
*/
- return !(iova & ~(vtd_iova_limit(s, ce, aw) - 1));
+ return !(iova & ~(vtd_iova_limit(s, ce, aw, pasid) - 1));
}
static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
}
@@ -965,66 +1045,59 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
* Rsvd field masks for spte:
* vtd_spte_rsvd 4k pages
* vtd_spte_rsvd_large large pages
+ *
+ * We support only 3-level and 4-level page tables (see vtd_init() which
+ * sets only VTD_CAP_SAGAW_39bit and maybe VTD_CAP_SAGAW_48bit bits in s->cap).
*/
-static uint64_t vtd_spte_rsvd[5];
-static uint64_t vtd_spte_rsvd_large[5];
+#define VTD_SPTE_RSVD_LEN 5
+static uint64_t vtd_spte_rsvd[VTD_SPTE_RSVD_LEN];
+static uint64_t vtd_spte_rsvd_large[VTD_SPTE_RSVD_LEN];
static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
{
- uint64_t rsvd_mask = vtd_spte_rsvd[level];
+ uint64_t rsvd_mask;
+
+ /*
+ * We should have caught a guest-mis-programmed level earlier,
+ * via vtd_is_level_supported.
+ */
+ assert(level < VTD_SPTE_RSVD_LEN);
+ /*
+ * Zero level doesn't exist. The smallest level is VTD_SL_PT_LEVEL=1 and
+ * checked by vtd_is_last_slpte().
+ */
+ assert(level);
if ((level == VTD_SL_PD_LEVEL || level == VTD_SL_PDP_LEVEL) &&
(slpte & VTD_SL_PT_PAGE_SIZE_MASK)) {
/* large page */
rsvd_mask = vtd_spte_rsvd_large[level];
+ } else {
+ rsvd_mask = vtd_spte_rsvd[level];
}
return slpte & rsvd_mask;
}
-/* Find the VTD address space associated with a given bus number */
-static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
-{
- VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
- GHashTableIter iter;
-
- if (vtd_bus) {
- return vtd_bus;
- }
-
- /*
- * Iterate over the registered buses to find the one which
- * currently holds this bus number and update the bus_num
- * lookup table.
- */
- g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
- while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
- if (pci_bus_num(vtd_bus->bus) == bus_num) {
- s->vtd_as_by_bus_num[bus_num] = vtd_bus;
- return vtd_bus;
- }
- }
-
- return NULL;
-}
-
/* Given the @iova, get relevant @slptep. @slpte_level will be the last level
* of the translation, can be used for deciding the size of large page.
*/
static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint64_t iova, bool is_write,
uint64_t *slptep, uint32_t *slpte_level,
- bool *reads, bool *writes, uint8_t aw_bits)
+ bool *reads, bool *writes, uint8_t aw_bits,
+ uint32_t pasid)
{
- dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce);
- uint32_t level = vtd_get_iova_level(s, ce);
+ dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
+ uint32_t level = vtd_get_iova_level(s, ce, pasid);
uint32_t offset;
uint64_t slpte;
uint64_t access_right_check;
+ uint64_t xlat, size;
- if (!vtd_iova_range_check(s, iova, ce, aw_bits)) {
- error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")",
- __func__, iova);
+ if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) {
+ error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ","
+ "pasid=0x%" PRIx32 ")", __func__, iova, pasid);
return -VTD_FR_ADDR_BEYOND_MGAW;
}
@@ -1037,8 +1110,9 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (slpte == (uint64_t)-1) {
error_report_once("%s: detected read error on DMAR slpte "
- "(iova=0x%" PRIx64 ")", __func__, iova);
- if (level == vtd_get_iova_level(s, ce)) {
+ "(iova=0x%" PRIx64 ", pasid=0x%" PRIx32 ")",
+ __func__, iova, pasid);
+ if (level == vtd_get_iova_level(s, ce, pasid)) {
/* Invalid programming of context-entry */
return -VTD_FR_CONTEXT_ENTRY_INV;
} else {
@@ -1050,26 +1124,50 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (!(slpte & access_right_check)) {
error_report_once("%s: detected slpte permission error "
"(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
- "slpte=0x%" PRIx64 ", write=%d)", __func__,
- iova, level, slpte, is_write);
+ "slpte=0x%" PRIx64 ", write=%d, pasid=0x%"
+ PRIx32 ")", __func__, iova, level,
+ slpte, is_write, pasid);
return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
}
if (vtd_slpte_nonzero_rsvd(slpte, level)) {
error_report_once("%s: detected splte reserve non-zero "
"iova=0x%" PRIx64 ", level=0x%" PRIx32
- "slpte=0x%" PRIx64 ")", __func__, iova,
- level, slpte);
+ "slpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")",
+ __func__, iova, level, slpte, pasid);
return -VTD_FR_PAGING_ENTRY_RSVD;
}
if (vtd_is_last_slpte(slpte, level)) {
*slptep = slpte;
*slpte_level = level;
- return 0;
+ break;
}
addr = vtd_get_slpte_addr(slpte, aw_bits);
level--;
}
+
+ xlat = vtd_get_slpte_addr(*slptep, aw_bits);
+ size = ~vtd_slpt_level_page_mask(level) + 1;
+
+ /*
+ * From VT-d spec 3.14: Untranslated requests and translation
+ * requests that result in an address in the interrupt range will be
+ * blocked with condition code LGN.4 or SGN.8.
+ */
+ if ((xlat > VTD_INTERRUPT_ADDR_LAST ||
+ xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) {
+ return 0;
+ } else {
+ error_report_once("%s: xlat address is in interrupt range "
+ "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
+ "slpte=0x%" PRIx64 ", write=%d, "
+ "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
+ "pasid=0x%" PRIx32 ")",
+ __func__, iova, level, slpte, is_write,
+ xlat, size, pasid);
+ return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
+ -VTD_FR_INTERRUPT_ADDR;
+ }
}
typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private);
@@ -1105,7 +1203,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info)
.translated_addr = entry->translated_addr,
.perm = entry->perm,
};
- DMAMap *mapped = iova_tree_find(as->iova_tree, &target);
+ const DMAMap *mapped = iova_tree_find(as->iova_tree, &target);
if (event->type == IOMMU_NOTIFIER_UNMAP && !info->notify_unmap) {
trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask);
@@ -1153,7 +1251,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info)
return ret;
}
/* Drop any existing mapping */
- iova_tree_remove(as->iova_tree, &target);
+ iova_tree_remove(as->iova_tree, target);
/* Recover the correct type */
event->type = IOMMU_NOTIFIER_MAP;
entry->perm = cache_perm;
@@ -1166,7 +1264,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info)
trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask);
return 0;
}
- iova_tree_remove(as->iova_tree, &target);
+ iova_tree_remove(as->iova_tree, target);
}
trace_vtd_page_walk_one(info->domain_id, entry->iova,
@@ -1280,18 +1378,19 @@ next:
*/
static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce,
uint64_t start, uint64_t end,
- vtd_page_walk_info *info)
+ vtd_page_walk_info *info,
+ uint32_t pasid)
{
- dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce);
- uint32_t level = vtd_get_iova_level(s, ce);
+ dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
+ uint32_t level = vtd_get_iova_level(s, ce, pasid);
- if (!vtd_iova_range_check(s, start, ce, info->aw)) {
+ if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) {
return -VTD_FR_ADDR_BEYOND_MGAW;
}
- if (!vtd_iova_range_check(s, end, ce, info->aw)) {
+ if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) {
/* Fix end so that it reaches the maximum */
- end = vtd_iova_limit(s, ce, info->aw);
+ end = vtd_iova_limit(s, ce, info->aw, pasid);
}
return vtd_page_walk_level(addr, start, end, level, true, true, info);
@@ -1359,7 +1458,7 @@ static int vtd_ce_rid2pasid_check(IntelIOMMUState *s,
* has valid rid2pasid setting, which includes valid
* rid2pasid field and corresponding pasid entry setting
*/
- return vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ return vtd_ce_get_rid2pasid_entry(s, ce, &pe, PCI_NO_PASID);
}
/* Map a device to its corresponding domain (context-entry) */
@@ -1442,12 +1541,13 @@ static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event,
}
static uint16_t vtd_get_domain_id(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
}
@@ -1465,19 +1565,23 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
.notify_unmap = true,
.aw = s->aw_bits,
.as = vtd_as,
- .domain_id = vtd_get_domain_id(s, ce),
+ .domain_id = vtd_get_domain_id(s, ce, vtd_as->pasid),
};
- return vtd_page_walk(s, ce, addr, addr + size, &info);
+ return vtd_page_walk(s, ce, addr, addr + size, &info, vtd_as->pasid);
}
-static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
+static int vtd_address_space_sync(VTDAddressSpace *vtd_as)
{
int ret;
VTDContextEntry ce;
IOMMUNotifier *n;
- if (!(vtd_as->iommu.iommu_notify_flags & IOMMU_NOTIFIER_IOTLB_EVENTS)) {
+ /* If no MAP notifier registered, we simply invalidate all the cache */
+ if (!vtd_as_has_map_notifier(vtd_as)) {
+ IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
+ memory_region_unmap_iommu_notifier_range(n);
+ }
return 0;
}
@@ -1512,19 +1616,38 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
* 1st-level translation or 2nd-level translation, it depends
* on PGTT setting.
*/
-static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
+static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce,
+ uint32_t pasid)
{
- IntelIOMMUState *s;
- VTDContextEntry ce;
VTDPASIDEntry pe;
int ret;
+ if (s->root_scalable) {
+ ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
+ if (ret) {
+ /*
+ * This error is guest triggerable. We should assumt PT
+ * not enabled for safety.
+ */
+ return false;
+ }
+ return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT);
+ }
+
+ return (vtd_ce_get_type(ce) == VTD_CONTEXT_TT_PASS_THROUGH);
+
+}
+
+static bool vtd_as_pt_enabled(VTDAddressSpace *as)
+{
+ IntelIOMMUState *s;
+ VTDContextEntry ce;
+
assert(as);
s = as->iommu_state;
- ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus),
- as->devfn, &ce);
- if (ret) {
+ if (vtd_dev_to_context_entry(s, pci_bus_num(as->bus), as->devfn,
+ &ce)) {
/*
* Possibly failed to parse the context entry for some reason
* (e.g., during init, or any guest configuration errors on
@@ -1534,29 +1657,20 @@ static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
return false;
}
- if (s->root_scalable) {
- ret = vtd_ce_get_rid2pasid_entry(s, &ce, &pe);
- if (ret) {
- error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32,
- __func__, ret);
- return false;
- }
- return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT);
- }
-
- return (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH);
+ return vtd_dev_pt_enabled(s, &ce, as->pasid);
}
/* Return whether the device is using IOMMU translation. */
static bool vtd_switch_address_space(VTDAddressSpace *as)
{
- bool use_iommu;
+ bool use_iommu, pt;
/* Whether we need to take the BQL on our own */
- bool take_bql = !qemu_mutex_iothread_locked();
+ bool take_bql = !bql_locked();
assert(as);
- use_iommu = as->iommu_state->dmar_enabled && !vtd_dev_pt_enabled(as);
+ use_iommu = as->iommu_state->dmar_enabled && !vtd_as_pt_enabled(as);
+ pt = as->iommu_state->dmar_enabled && vtd_as_pt_enabled(as);
trace_vtd_switch_address_space(pci_bus_num(as->bus),
VTD_PCI_SLOT(as->devfn),
@@ -1569,20 +1683,62 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
* it. We'd better make sure we have had it already, or, take it.
*/
if (take_bql) {
- qemu_mutex_lock_iothread();
+ bql_lock();
}
/* Turn off first then on the other */
if (use_iommu) {
memory_region_set_enabled(&as->nodmar, false);
memory_region_set_enabled(MEMORY_REGION(&as->iommu), true);
+ /*
+ * vt-d spec v3.4 3.14:
+ *
+ * """
+ * Requests-with-PASID with input address in range 0xFEEx_xxxx
+ * are translated normally like any other request-with-PASID
+ * through DMA-remapping hardware.
+ * """
+ *
+ * Need to disable ir for as with PASID.
+ */
+ if (as->pasid != PCI_NO_PASID) {
+ memory_region_set_enabled(&as->iommu_ir, false);
+ } else {
+ memory_region_set_enabled(&as->iommu_ir, true);
+ }
} else {
memory_region_set_enabled(MEMORY_REGION(&as->iommu), false);
memory_region_set_enabled(&as->nodmar, true);
}
+ /*
+ * vtd-spec v3.4 3.14:
+ *
+ * """
+ * Requests-with-PASID with input address in range 0xFEEx_xxxx are
+ * translated normally like any other request-with-PASID through
+ * DMA-remapping hardware. However, if such a request is processed
+ * using pass-through translation, it will be blocked as described
+ * in the paragraph below.
+ *
+ * Software must not program paging-structure entries to remap any
+ * address to the interrupt address range. Untranslated requests
+ * and translation requests that result in an address in the
+ * interrupt range will be blocked with condition code LGN.4 or
+ * SGN.8.
+ * """
+ *
+ * We enable per as memory region (iommu_ir_fault) for catching
+ * the translation for interrupt range through PASID + PT.
+ */
+ if (pt && as->pasid != PCI_NO_PASID) {
+ memory_region_set_enabled(&as->iommu_ir_fault, true);
+ } else {
+ memory_region_set_enabled(&as->iommu_ir_fault, false);
+ }
+
if (take_bql) {
- qemu_mutex_unlock_iothread();
+ bql_unlock();
}
return use_iommu;
@@ -1590,24 +1746,13 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
static void vtd_switch_address_space_all(IntelIOMMUState *s)
{
+ VTDAddressSpace *vtd_as;
GHashTableIter iter;
- VTDBus *vtd_bus;
- int i;
-
- g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
- while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
- for (i = 0; i < PCI_DEVFN_MAX; i++) {
- if (!vtd_bus->dev_as[i]) {
- continue;
- }
- vtd_switch_address_space(vtd_bus->dev_as[i]);
- }
- }
-}
-static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
-{
- return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
+ g_hash_table_iter_init(&iter, s->vtd_address_spaces);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) {
+ vtd_switch_address_space(vtd_as);
+ }
}
static const bool vtd_qualified_faults[] = {
@@ -1621,11 +1766,12 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_PAGING_ENTRY_INV] = true,
[VTD_FR_ROOT_TABLE_INV] = false,
[VTD_FR_CONTEXT_TABLE_INV] = false,
+ [VTD_FR_INTERRUPT_ADDR] = true,
[VTD_FR_ROOT_ENTRY_RSVD] = false,
[VTD_FR_PAGING_ENTRY_RSVD] = true,
[VTD_FR_CONTEXT_ENTRY_TT] = true,
[VTD_FR_PASID_TABLE_INV] = false,
- [VTD_FR_RESERVED_ERR] = false,
+ [VTD_FR_SM_INTERRUPT_ADDR] = true,
[VTD_FR_MAX] = false,
};
@@ -1643,18 +1789,37 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
}
+static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ struct vtd_as_key *as_key = (struct vtd_as_key *)key;
+ uint16_t target_sid = *(uint16_t *)user_data;
+ uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn);
+ return sid == target_sid;
+}
+
+static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+{
+ uint8_t bus_num = PCI_BUS_NUM(sid);
+ VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
+
+ if (vtd_as &&
+ (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) {
+ return vtd_as;
+ }
+
+ vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid);
+ s->vtd_as_cache[bus_num] = vtd_as;
+
+ return vtd_as;
+}
+
static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
{
- VTDBus *vtd_bus;
VTDAddressSpace *vtd_as;
bool success = false;
- vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
- if (!vtd_bus) {
- goto out;
- }
-
- vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
+ vtd_as = vtd_get_as_by_sid(s, source_id);
if (!vtd_as) {
goto out;
}
@@ -1668,6 +1833,22 @@ out:
trace_vtd_pt_enable_fast_path(source_id, success);
}
+static void vtd_report_fault(IntelIOMMUState *s,
+ int err, bool is_fpd_set,
+ uint16_t source_id,
+ hwaddr addr,
+ bool is_write,
+ bool is_pasid,
+ uint32_t pasid)
+{
+ if (is_fpd_set && vtd_is_qualified_fault(err)) {
+ trace_vtd_fault_disabled();
+ } else {
+ vtd_report_dmar_fault(s, source_id, addr, err, is_write,
+ is_pasid, pasid);
+ }
+}
+
/* Map dev to context-entry then do a paging-structures walk to do a iommu
* translation.
*
@@ -1689,13 +1870,14 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
uint8_t bus_num = pci_bus_num(bus);
VTDContextCacheEntry *cc_entry;
uint64_t slpte, page_mask;
- uint32_t level;
- uint16_t source_id = vtd_make_source_id(bus_num, devfn);
+ uint32_t level, pasid = vtd_as->pasid;
+ uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
int ret_fr;
bool is_fpd_set = false;
bool reads = true;
bool writes = true;
uint8_t access_flags;
+ bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
VTDIOTLBEntry *iotlb_entry;
/*
@@ -1708,15 +1890,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
cc_entry = &vtd_as->context_cache_entry;
- /* Try to fetch slpte form IOTLB */
- iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
- if (iotlb_entry) {
- trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
- iotlb_entry->domain_id);
- slpte = iotlb_entry->slpte;
- access_flags = iotlb_entry->access_flags;
- page_mask = iotlb_entry->mask;
- goto out;
+ /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */
+ if (!rid2pasid) {
+ iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
+ if (iotlb_entry) {
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ iotlb_entry->domain_id);
+ slpte = iotlb_entry->slpte;
+ access_flags = iotlb_entry->access_flags;
+ page_mask = iotlb_entry->mask;
+ goto out;
+ }
}
/* Try to fetch context-entry from cache first */
@@ -1727,16 +1911,26 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
ce = cc_entry->context_entry;
is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
if (!is_fpd_set && s->root_scalable) {
- ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set);
- VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
+ ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid);
+ if (ret_fr) {
+ vtd_report_fault(s, -ret_fr, is_fpd_set,
+ source_id, addr, is_write,
+ false, 0);
+ goto error;
+ }
}
} else {
ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
if (!ret_fr && !is_fpd_set && s->root_scalable) {
- ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set);
+ ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid);
+ }
+ if (ret_fr) {
+ vtd_report_fault(s, -ret_fr, is_fpd_set,
+ source_id, addr, is_write,
+ false, 0);
+ goto error;
}
- VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
/* Update context-cache */
trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
cc_entry->context_cache_gen,
@@ -1745,11 +1939,15 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
cc_entry->context_cache_gen = s->context_cache_gen;
}
+ if (rid2pasid) {
+ pasid = VTD_CE_GET_RID2PASID(&ce);
+ }
+
/*
* We don't need to translate for pass-through context entries.
* Also, let's ignore IOTLB caching as well for PT devices.
*/
- if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
+ if (vtd_dev_pt_enabled(s, &ce, pasid)) {
entry->iova = addr & VTD_PAGE_MASK_4K;
entry->translated_addr = entry->iova;
entry->addr_mask = ~VTD_PAGE_MASK_4K;
@@ -1770,14 +1968,31 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
return true;
}
+ /* Try to fetch slpte form IOTLB for RID2PASID slow path */
+ if (rid2pasid) {
+ iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
+ if (iotlb_entry) {
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ iotlb_entry->domain_id);
+ slpte = iotlb_entry->slpte;
+ access_flags = iotlb_entry->access_flags;
+ page_mask = iotlb_entry->mask;
+ goto out;
+ }
+ }
+
ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level,
- &reads, &writes, s->aw_bits);
- VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
+ &reads, &writes, s->aw_bits, pasid);
+ if (ret_fr) {
+ vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
+ addr, is_write, pasid != PCI_NO_PASID, pasid);
+ goto error;
+ }
page_mask = vtd_slpt_level_page_mask(level);
access_flags = IOMMU_ACCESS_FLAG(reads, writes);
- vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce), addr, slpte,
- access_flags, level);
+ vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid),
+ addr, slpte, access_flags, level, pasid);
out:
vtd_iommu_unlock(s);
entry->iova = addr & page_mask;
@@ -1830,7 +2045,7 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
VTDAddressSpace *vtd_as;
QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
- vtd_sync_shadow_page_table(vtd_as);
+ vtd_address_space_sync(vtd_as);
}
}
@@ -1862,11 +2077,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
uint16_t source_id,
uint16_t func_mask)
{
+ GHashTableIter as_it;
uint16_t mask;
- VTDBus *vtd_bus;
VTDAddressSpace *vtd_as;
uint8_t bus_n, devfn;
- uint16_t devfn_it;
trace_vtd_inv_desc_cc_devices(source_id, func_mask);
@@ -1889,32 +2103,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
mask = ~mask;
bus_n = VTD_SID_TO_BUS(source_id);
- vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
- if (vtd_bus) {
- devfn = VTD_SID_TO_DEVFN(source_id);
- for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
- vtd_as = vtd_bus->dev_as[devfn_it];
- if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
- trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
- VTD_PCI_FUNC(devfn_it));
- vtd_iommu_lock(s);
- vtd_as->context_cache_entry.context_cache_gen = 0;
- vtd_iommu_unlock(s);
- /*
- * Do switch address space when needed, in case if the
- * device passthrough bit is switched.
- */
- vtd_switch_address_space(vtd_as);
- /*
- * So a device is moving out of (or moving into) a
- * domain, resync the shadow page table.
- * This won't bring bad even if we have no such
- * notifier registered - the IOMMU notification
- * framework will skip MAP notifications if that
- * happened.
- */
- vtd_sync_shadow_page_table(vtd_as);
- }
+ devfn = VTD_SID_TO_DEVFN(source_id);
+
+ g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
+ if ((pci_bus_num(vtd_as->bus) == bus_n) &&
+ (vtd_as->devfn & mask) == (devfn & mask)) {
+ trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn),
+ VTD_PCI_FUNC(vtd_as->devfn));
+ vtd_iommu_lock(s);
+ vtd_as->context_cache_entry.context_cache_gen = 0;
+ vtd_iommu_unlock(s);
+ /*
+ * Do switch address space when needed, in case if the
+ * device passthrough bit is switched.
+ */
+ vtd_switch_address_space(vtd_as);
+ /*
+ * So a device is moving out of (or moving into) a
+ * domain, resync the shadow page table.
+ * This won't bring bad even if we have no such
+ * notifier registered - the IOMMU notification
+ * framework will skip MAP notifications if that
+ * happened.
+ */
+ vtd_address_space_sync(vtd_as);
}
}
}
@@ -1971,15 +2184,15 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce) &&
- domain_id == vtd_get_domain_id(s, &ce)) {
- vtd_sync_shadow_page_table(vtd_as);
+ domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
+ vtd_address_space_sync(vtd_as);
}
}
}
static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
uint16_t domain_id, hwaddr addr,
- uint8_t am)
+ uint8_t am, uint32_t pasid)
{
VTDAddressSpace *vtd_as;
VTDContextEntry ce;
@@ -1987,9 +2200,12 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
hwaddr size = (1 << am) * VTD_PAGE_SIZE;
QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
+ if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) {
+ continue;
+ }
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
- if (!ret && domain_id == vtd_get_domain_id(s, &ce)) {
+ if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
if (vtd_as_has_map_notifier(vtd_as)) {
/*
* As long as we have MAP notifications registered in
@@ -2033,7 +2249,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
vtd_iommu_lock(s);
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
vtd_iommu_unlock(s);
- vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
+ vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID);
}
/* Flush IOTLB
@@ -2197,12 +2413,13 @@ static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
/* Handle write to Global Command Register */
static void vtd_handle_gcmd_write(IntelIOMMUState *s)
{
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG);
uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
uint32_t changed = status ^ val;
trace_vtd_reg_write_gcmd(status, val);
- if (changed & VTD_GCMD_TE) {
+ if ((changed & VTD_GCMD_TE) && s->dma_translation) {
/* Translation enable/disable */
vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
}
@@ -2218,7 +2435,8 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
/* Set/update the interrupt remapping root-table pointer */
vtd_handle_gcmd_sirtp(s);
}
- if (changed & VTD_GCMD_IRE) {
+ if ((changed & VTD_GCMD_IRE) &&
+ x86_iommu_ir_supported(x86_iommu)) {
/* Interrupt remap enable/disable */
vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
}
@@ -2275,7 +2493,8 @@ static bool vtd_get_inv_desc(IntelIOMMUState *s,
uint32_t dw = s->iq_dw ? 32 : 16;
dma_addr_t addr = base_addr + offset * dw;
- if (dma_memory_read(&address_space_memory, addr, inv_desc, dw)) {
+ if (dma_memory_read(&address_space_memory, addr,
+ inv_desc, dw, MEMTXATTRS_UNSPECIFIED)) {
error_report_once("Read INV DESC failed.");
return false;
}
@@ -2308,8 +2527,9 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
dma_addr_t status_addr = inv_desc->hi;
trace_vtd_inv_desc_wait_sw(status_addr, status_data);
status_data = cpu_to_le32(status_data);
- if (dma_memory_write(&address_space_memory, status_addr, &status_data,
- sizeof(status_data))) {
+ if (dma_memory_write(&address_space_memory, status_addr,
+ &status_data, sizeof(status_data),
+ MEMTXATTRS_UNSPECIFIED)) {
trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo);
return false;
}
@@ -2426,18 +2646,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
{
VTDAddressSpace *vtd_dev_as;
IOMMUTLBEvent event;
- struct VTDBus *vtd_bus;
hwaddr addr;
uint64_t sz;
uint16_t sid;
- uint8_t devfn;
bool size;
- uint8_t bus_num;
addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
- devfn = sid & 0xff;
- bus_num = sid >> 8;
size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
@@ -2448,12 +2663,11 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
return false;
}
- vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
- if (!vtd_bus) {
- goto done;
- }
-
- vtd_dev_as = vtd_bus->dev_as[devfn];
+ /*
+ * Using sid is OK since the guest should have finished the
+ * initialization of both the bus and device.
+ */
+ vtd_dev_as = vtd_get_as_by_sid(s, sid);
if (!vtd_dev_as) {
goto done;
}
@@ -2869,12 +3083,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
} else {
vtd_set_quad(s, addr, val);
}
- if (s->ecap & VTD_ECAP_SMTS &&
- val & VTD_IQA_DW_MASK) {
- s->iq_dw = true;
- } else {
- s->iq_dw = false;
- }
+ vtd_update_iq_dw(s);
break;
case DMAR_IQA_REG_HI:
@@ -3015,6 +3224,28 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+ /* TODO: add support for VFIO and vhost users */
+ if (s->snoop_control) {
+ error_setg_errno(errp, ENOTSUP,
+ "Snoop Control with vhost or VFIO is not supported");
+ return -ENOTSUP;
+ }
+ if (!s->caching_mode && (new & IOMMU_NOTIFIER_MAP)) {
+ error_setg_errno(errp, ENOTSUP,
+ "device %02x.%02x.%x requires caching mode",
+ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),
+ PCI_FUNC(vtd_as->devfn));
+ return -ENOTSUP;
+ }
+ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) {
+ error_setg_errno(errp, ENOTSUP,
+ "device %02x.%02x.%x requires device IOTLB mode",
+ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),
+ PCI_FUNC(vtd_as->devfn));
+ return -ENOTSUP;
+ }
/* Update per-address-space notifier flags */
vtd_as->notifier_flags = new;
@@ -3032,13 +3263,6 @@ static int vtd_post_load(void *opaque, int version_id)
IntelIOMMUState *iommu = opaque;
/*
- * Memory regions are dynamically turned on/off depending on
- * context entry configurations from the guest. After migration,
- * we need to make sure the memory regions are still correct.
- */
- vtd_switch_address_space_all(iommu);
-
- /*
* We don't need to migrate the root_scalable because we can
* simply do the calculation after the loading is complete. We
* can actually do similar things with root, dmar_enabled, etc.
@@ -3047,6 +3271,15 @@ static int vtd_post_load(void *opaque, int version_id)
*/
vtd_update_scalable_state(iommu);
+ vtd_update_iq_dw(iommu);
+
+ /*
+ * Memory regions are dynamically turned on/off depending on
+ * context entry configurations from the guest. After migration,
+ * we need to make sure the memory regions are still correct.
+ */
+ vtd_switch_address_space_all(iommu);
+
return 0;
}
@@ -3056,7 +3289,7 @@ static const VMStateDescription vtd_vmstate = {
.minimum_version_id = 1,
.priority = MIG_PRI_IOMMU,
.post_load = vtd_post_load,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT64(root, IntelIOMMUState),
VMSTATE_UINT64(intr_root, IntelIOMMUState),
VMSTATE_UINT64(iq, IntelIOMMUState),
@@ -3099,13 +3332,17 @@ static Property vtd_properties[] = {
VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
+ DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
+ DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
+ DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
DEFINE_PROP_END_OF_LIST(),
};
/* Read IRTE entry with specific index */
-static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
- VTD_IR_TableEntry *entry, uint16_t sid)
+static bool vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
+ VTD_IR_TableEntry *entry, uint16_t sid,
+ bool do_fault)
{
static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
{0xffff, 0xfffb, 0xfff9, 0xfff8};
@@ -3116,40 +3353,62 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
if (index >= iommu->intr_size) {
error_report_once("%s: index too large: ind=0x%x",
__func__, index);
- return -VTD_FR_IR_INDEX_OVER;
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_INDEX_OVER, index);
+ }
+ return false;
}
addr = iommu->intr_root + index * sizeof(*entry);
- if (dma_memory_read(&address_space_memory, addr, entry,
- sizeof(*entry))) {
+ if (dma_memory_read(&address_space_memory, addr,
+ entry, sizeof(*entry), MEMTXATTRS_UNSPECIFIED)) {
error_report_once("%s: read failed: ind=0x%x addr=0x%" PRIx64,
__func__, index, addr);
- return -VTD_FR_IR_ROOT_INVAL;
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ROOT_INVAL, index);
+ }
+ return false;
}
- trace_vtd_ir_irte_get(index, le64_to_cpu(entry->data[1]),
- le64_to_cpu(entry->data[0]));
+ entry->data[0] = le64_to_cpu(entry->data[0]);
+ entry->data[1] = le64_to_cpu(entry->data[1]);
+
+ trace_vtd_ir_irte_get(index, entry->data[1], entry->data[0]);
+
+ /*
+ * The remaining potential fault conditions are "qualified" by the
+ * Fault Processing Disable bit in the IRTE. Even "not present".
+ * So just clear the do_fault flag if PFD is set, which will
+ * prevent faults being raised.
+ */
+ if (entry->irte.fault_disable) {
+ do_fault = false;
+ }
if (!entry->irte.present) {
error_report_once("%s: detected non-present IRTE "
"(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")",
- __func__, index, le64_to_cpu(entry->data[1]),
- le64_to_cpu(entry->data[0]));
- return -VTD_FR_IR_ENTRY_P;
+ __func__, index, entry->data[1], entry->data[0]);
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ENTRY_P, index);
+ }
+ return false;
}
if (entry->irte.__reserved_0 || entry->irte.__reserved_1 ||
entry->irte.__reserved_2) {
error_report_once("%s: detected non-zero reserved IRTE "
"(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")",
- __func__, index, le64_to_cpu(entry->data[1]),
- le64_to_cpu(entry->data[0]));
- return -VTD_FR_IR_IRTE_RSVD;
+ __func__, index, entry->data[1], entry->data[0]);
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_IRTE_RSVD, index);
+ }
+ return false;
}
if (sid != X86_IOMMU_SID_INVALID) {
/* Validate IRTE SID */
- source_id = le32_to_cpu(entry->irte.source_id);
+ source_id = entry->irte.source_id;
switch (entry->irte.sid_vtype) {
case VTD_SVT_NONE:
break;
@@ -3160,7 +3419,10 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
error_report_once("%s: invalid IRTE SID "
"(index=%u, sid=%u, source_id=%u)",
__func__, index, sid, source_id);
- return -VTD_FR_IR_SID_ERR;
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index);
+ }
+ return false;
}
break;
@@ -3172,7 +3434,10 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
error_report_once("%s: invalid SVT_BUS "
"(index=%u, bus=%u, min=%u, max=%u)",
__func__, index, bus, bus_min, bus_max);
- return -VTD_FR_IR_SID_ERR;
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index);
+ }
+ return false;
}
break;
@@ -3181,29 +3446,30 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
"(index=%u, type=%d)", __func__,
index, entry->irte.sid_vtype);
/* Take this as verification failure. */
- return -VTD_FR_IR_SID_ERR;
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index);
+ }
+ return false;
}
}
- return 0;
+ return true;
}
/* Fetch IRQ information of specific IR index */
-static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
- X86IOMMUIrq *irq, uint16_t sid)
+static bool vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
+ X86IOMMUIrq *irq, uint16_t sid, bool do_fault)
{
VTD_IR_TableEntry irte = {};
- int ret = 0;
- ret = vtd_irte_get(iommu, index, &irte, sid);
- if (ret) {
- return ret;
+ if (!vtd_irte_get(iommu, index, &irte, sid, do_fault)) {
+ return false;
}
irq->trigger_mode = irte.irte.trigger_mode;
irq->vector = irte.irte.vector;
irq->delivery_mode = irte.irte.delivery_mode;
- irq->dest = le32_to_cpu(irte.irte.dest_id);
+ irq->dest = irte.irte.dest_id;
if (!iommu->intr_eime) {
#define VTD_IR_APIC_DEST_MASK (0xff00ULL)
#define VTD_IR_APIC_DEST_SHIFT (8)
@@ -3216,16 +3482,15 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
trace_vtd_ir_remap(index, irq->trigger_mode, irq->vector,
irq->delivery_mode, irq->dest, irq->dest_mode);
- return 0;
+ return true;
}
/* Interrupt remapping for MSI/MSI-X entry */
static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
MSIMessage *origin,
MSIMessage *translated,
- uint16_t sid)
+ uint16_t sid, bool do_fault)
{
- int ret = 0;
VTD_IR_MSIAddress addr;
uint16_t index;
X86IOMMUIrq irq = {};
@@ -3242,14 +3507,20 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
if (origin->address & VTD_MSI_ADDR_HI_MASK) {
error_report_once("%s: MSI address high 32 bits non-zero detected: "
"address=0x%" PRIx64, __func__, origin->address);
- return -VTD_FR_IR_REQ_RSVD;
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0);
+ }
+ return -EINVAL;
}
addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
if (addr.addr.__head != 0xfee) {
error_report_once("%s: MSI address low 32 bit invalid: 0x%" PRIx32,
__func__, addr.data);
- return -VTD_FR_IR_REQ_RSVD;
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0);
+ }
+ return -EINVAL;
}
/* This is compatible mode. */
@@ -3258,7 +3529,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
goto out;
}
- index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l);
+ index = addr.addr.index_h << 15 | addr.addr.index_l;
#define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff)
#define VTD_IR_MSI_DATA_RESERVED (0xffff0000)
@@ -3268,9 +3539,8 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE;
}
- ret = vtd_remap_irq_get(iommu, index, &irq, sid);
- if (ret) {
- return ret;
+ if (!vtd_remap_irq_get(iommu, index, &irq, sid, do_fault)) {
+ return -EINVAL;
}
if (addr.addr.sub_valid) {
@@ -3280,7 +3550,10 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
"(sid=%u, address=0x%" PRIx64
", data=0x%" PRIx32 ")",
__func__, sid, origin->address, origin->data);
- return -VTD_FR_IR_REQ_RSVD;
+ if (do_fault) {
+ vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0);
+ }
+ return -EINVAL;
}
} else {
uint8_t vector = origin->data & 0xff;
@@ -3320,7 +3593,7 @@ static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src,
MSIMessage *dst, uint16_t sid)
{
return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu),
- src, dst, sid);
+ src, dst, sid, false);
}
static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr,
@@ -3346,14 +3619,13 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
sid = attrs.requester_id;
}
- ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
+ ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid, true);
if (ret) {
- /* TODO: report error */
/* Drop this interrupt */
return MEMTX_ERROR;
}
- apic_get_class()->send_msi(&to);
+ apic_get_class(NULL)->send_msi(&to);
return MEMTX_OK;
}
@@ -3372,32 +3644,98 @@ static const MemoryRegionOps vtd_mem_ir_ops = {
},
};
-VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
+static void vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as,
+ hwaddr addr, bool is_write)
{
- uintptr_t key = (uintptr_t)bus;
- VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
- VTDAddressSpace *vtd_dev_as;
- char name[128];
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ uint8_t bus_n = pci_bus_num(vtd_as->bus);
+ uint16_t sid = PCI_BUILD_BDF(bus_n, vtd_as->devfn);
+ bool is_fpd_set = false;
+ VTDContextEntry ce;
+
+ assert(vtd_as->pasid != PCI_NO_PASID);
- if (!vtd_bus) {
- uintptr_t *new_key = g_malloc(sizeof(*new_key));
- *new_key = (uintptr_t)bus;
- /* No corresponding free() */
- vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
- PCI_DEVFN_MAX);
- vtd_bus->bus = bus;
- g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
+ /* Try out best to fetch FPD, we can't do anything more */
+ if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
+ is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+ if (!is_fpd_set && s->root_scalable) {
+ vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
+ }
}
- vtd_dev_as = vtd_bus->dev_as[devfn];
+ vtd_report_fault(s, VTD_FR_SM_INTERRUPT_ADDR,
+ is_fpd_set, sid, addr, is_write,
+ true, vtd_as->pasid);
+}
+
+static MemTxResult vtd_mem_ir_fault_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size,
+ MemTxAttrs attrs)
+{
+ vtd_report_ir_illegal_access(opaque, addr, false);
+
+ return MEMTX_ERROR;
+}
+
+static MemTxResult vtd_mem_ir_fault_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size,
+ MemTxAttrs attrs)
+{
+ vtd_report_ir_illegal_access(opaque, addr, true);
+
+ return MEMTX_ERROR;
+}
+
+static const MemoryRegionOps vtd_mem_ir_fault_ops = {
+ .read_with_attrs = vtd_mem_ir_fault_read,
+ .write_with_attrs = vtd_mem_ir_fault_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+};
+
+VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
+ int devfn, unsigned int pasid)
+{
+ /*
+ * We can't simply use sid here since the bus number might not be
+ * initialized by the guest.
+ */
+ struct vtd_as_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ .pasid = pasid,
+ };
+ VTDAddressSpace *vtd_dev_as;
+ char name[128];
+ vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key);
if (!vtd_dev_as) {
- snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
- PCI_FUNC(devfn));
- vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
+ struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
+
+ new_key->bus = bus;
+ new_key->devfn = devfn;
+ new_key->pasid = pasid;
+
+ if (pasid == PCI_NO_PASID) {
+ snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
+ PCI_FUNC(devfn));
+ } else {
+ snprintf(name, sizeof(name), "vtd-%02x.%x-pasid-%x", PCI_SLOT(devfn),
+ PCI_FUNC(devfn), pasid);
+ }
+
+ vtd_dev_as = g_new0(VTDAddressSpace, 1);
vtd_dev_as->bus = bus;
vtd_dev_as->devfn = (uint8_t)devfn;
+ vtd_dev_as->pasid = pasid;
vtd_dev_as->iommu_state = s;
vtd_dev_as->context_cache_entry.context_cache_gen = 0;
vtd_dev_as->iova_tree = iova_tree_new();
@@ -3439,6 +3777,24 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
&vtd_dev_as->iommu_ir, 1);
/*
+ * This region is used for catching fault to access interrupt
+ * range via passthrough + PASID. See also
+ * vtd_switch_address_space(). We can't use alias since we
+ * need to know the sid which is valid for MSI who uses
+ * bus_master_as (see msi_send_message()).
+ */
+ memory_region_init_io(&vtd_dev_as->iommu_ir_fault, OBJECT(s),
+ &vtd_mem_ir_fault_ops, vtd_dev_as, "vtd-no-ir",
+ VTD_INTERRUPT_ADDR_SIZE);
+ /*
+ * Hook to root since when PT is enabled vtd_dev_as->iommu
+ * will be disabled.
+ */
+ memory_region_add_subregion_overlap(MEMORY_REGION(&vtd_dev_as->root),
+ VTD_INTERRUPT_ADDR_FIRST,
+ &vtd_dev_as->iommu_ir_fault, 2);
+
+ /*
* Hook both the containers under the root container, we
* switch between DMAR & noDMAR by enable/disable
* corresponding sub-containers
@@ -3450,6 +3806,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
&vtd_dev_as->nodmar, 0);
vtd_switch_address_space(vtd_dev_as);
+
+ g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as);
}
return vtd_dev_as;
}
@@ -3457,7 +3815,7 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
/* Unmap the whole range in the notifier's scope. */
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
{
- hwaddr size, remain;
+ hwaddr total, remain;
hwaddr start = n->start;
hwaddr end = n->end;
IntelIOMMUState *s = as->iommu_state;
@@ -3478,7 +3836,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
}
assert(start <= end);
- size = remain = end - start + 1;
+ total = remain = end - start + 1;
while (remain >= VTD_PAGE_SIZE) {
IOMMUTLBEvent event;
@@ -3506,11 +3864,11 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
VTD_PCI_SLOT(as->devfn),
VTD_PCI_FUNC(as->devfn),
- n->start, size);
+ n->start, total);
map.iova = n->start;
- map.size = size;
- iova_tree_remove(as->iova_tree, &map);
+ map.size = total - 1; /* Inclusive */
+ iova_tree_remove(as->iova_tree, map);
}
static void vtd_address_space_unmap_all(IntelIOMMUState *s)
@@ -3543,22 +3901,19 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
IntelIOMMUState *s = vtd_as->iommu_state;
uint8_t bus_n = pci_bus_num(vtd_as->bus);
VTDContextEntry ce;
+ DMAMap map = { .iova = 0, .size = HWADDR_MAX };
- /*
- * The replay can be triggered by either a invalidation or a newly
- * created entry. No matter what, we release existing mappings
- * (it means flushing caches for UNMAP-only registers).
- */
- vtd_address_space_unmap(vtd_as, n);
+ /* replay is protected by BQL, page walk will re-setup it safely */
+ iova_tree_remove(vtd_as->iova_tree, map);
if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
trace_vtd_replay_ce_valid(s->root_scalable ? "scalable mode" :
"legacy mode",
bus_n, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn),
- vtd_get_domain_id(s, &ce),
+ vtd_get_domain_id(s, &ce, vtd_as->pasid),
ce.hi, ce.lo);
- if (vtd_as_has_map_notifier(vtd_as)) {
+ if (n->notifier_flags & IOMMU_NOTIFIER_MAP) {
/* This is required only for MAP typed notifiers */
vtd_page_walk_info info = {
.hook_fn = vtd_replay_hook,
@@ -3566,10 +3921,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
.notify_unmap = false,
.aw = s->aw_bits,
.as = vtd_as,
- .domain_id = vtd_get_domain_id(s, &ce),
+ .domain_id = vtd_get_domain_id(s, &ce, vtd_as->pasid),
};
- vtd_page_walk(s, &ce, 0, ~0ULL, &info);
+ vtd_page_walk(s, &ce, 0, ~0ULL, &info, vtd_as->pasid);
}
} else {
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
@@ -3605,12 +3960,17 @@ static void vtd_init(IntelIOMMUState *s)
s->next_frcd_reg = 0;
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
- VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
+ VTD_CAP_MGAW(s->aw_bits);
if (s->dma_drain) {
s->cap |= VTD_CAP_DRAIN;
}
- if (s->aw_bits == VTD_HOST_AW_48BIT) {
- s->cap |= VTD_CAP_SAGAW_48bit;
+ if (s->dma_translation) {
+ if (s->aw_bits >= VTD_HOST_AW_39BIT) {
+ s->cap |= VTD_CAP_SAGAW_39bit;
+ }
+ if (s->aw_bits >= VTD_HOST_AW_48BIT) {
+ s->cap |= VTD_CAP_SAGAW_48bit;
+ }
}
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
@@ -3629,6 +3989,12 @@ static void vtd_init(IntelIOMMUState *s)
vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
x86_iommu->dt_supported);
+ if (s->scalable_mode || s->snoop_control) {
+ vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
+ vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
+ vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
+ }
+
if (x86_iommu_ir_supported(x86_iommu)) {
s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
if (s->intr_eim == ON_OFF_AUTO_ON) {
@@ -3654,6 +4020,14 @@ static void vtd_init(IntelIOMMUState *s)
s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
}
+ if (s->snoop_control) {
+ s->ecap |= VTD_ECAP_SC;
+ }
+
+ if (s->pasid) {
+ s->ecap |= VTD_ECAP_PASID;
+ }
+
vtd_reset_caches(s);
/* Define registers with default values and bit semantics */
@@ -3727,10 +4101,14 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
- vtd_as = vtd_find_add_as(s, bus, devfn);
+ vtd_as = vtd_find_add_as(s, bus, devfn, PCI_NO_PASID);
return &vtd_as->as;
}
+static PCIIOMMUOps vtd_iommu_ops = {
+ .get_address_space = vtd_host_dma_iommu,
+};
+
static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
@@ -3746,11 +4124,7 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}
if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
- if (!kvm_irqchip_in_kernel()) {
- error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
- return false;
- }
- if (!kvm_enable_x2apic()) {
+ if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
error_setg(errp, "eim=on requires support on the KVM side"
"(X2APIC_API, first shipped in v4.7)");
return false;
@@ -3770,6 +4144,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
return false;
}
+ if (s->pasid && !s->scalable_mode) {
+ error_setg(errp, "Need to set scalable mode for PASID");
+ return false;
+ }
+
return true;
}
@@ -3804,11 +4183,19 @@ static void vtd_realize(DeviceState *dev, Error **errp)
MachineState *ms = MACHINE(qdev_get_machine());
PCMachineState *pcms = PC_MACHINE(ms);
X86MachineState *x86ms = X86_MACHINE(ms);
- PCIBus *bus = pcms->bus;
+ PCIBus *bus = pcms->pcibus;
IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
- X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
- x86_iommu->type = TYPE_INTEL;
+ if (s->pasid && x86_iommu->dt_supported) {
+ /*
+ * PASID-based-Device-TLB Invalidate Descriptor is not
+ * implemented and it requires support from vhost layer which
+ * needs to be implemented in the future.
+ */
+ error_setg(errp, "PASID based device IOTLB is not supported");
+ return;
+ }
if (!vtd_decide_config(s, errp)) {
return;
@@ -3816,9 +4203,10 @@ static void vtd_realize(DeviceState *dev, Error **errp)
QLIST_INIT(&s->vtd_as_with_notifiers);
qemu_mutex_init(&s->iommu_lock);
- memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);
+ memory_region_add_subregion(get_system_memory(),
+ Q35_HOST_BRIDGE_IOMMU_ADDR, &s->csrmem);
/* Create the shared memory regions by all devices */
memory_region_init(&s->mr_nodmar, OBJECT(s), "vtd-nodmar",
@@ -3833,16 +4221,13 @@ static void vtd_realize(DeviceState *dev, Error **errp)
memory_region_add_subregion_overlap(&s->mr_nodmar,
VTD_INTERRUPT_ADDR_FIRST,
&s->mr_ir, 1);
-
- sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
/* No corresponding destroy */
- s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
+ s->iotlb = g_hash_table_new_full(vtd_iotlb_hash, vtd_iotlb_equal,
g_free, g_free);
- s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
- g_free, g_free);
+ s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
+ g_free, g_free);
vtd_init(s);
- sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
- pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
+ pci_setup_iommu(bus, &vtd_iommu_ops, dev);
/* Pseudo address space under root PCI bus. */
x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);