summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/iommu/Kconfig15
-rw-r--r--drivers/iommu/dma-mapping-fast.c44
-rw-r--r--drivers/iommu/io-pgtable-fast.c77
-rw-r--r--include/linux/dma-mapping-fast.h1
-rw-r--r--include/linux/io-pgtable-fast.h33
5 files changed, 163 insertions, 7 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index fff7785726ae..f64111886584 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -60,6 +60,21 @@ config IOMMU_IO_PGTABLE_FAST_SELFTEST
If unsure, say N here.
+config IOMMU_IO_PGTABLE_FAST_PROVE_TLB
+ bool "Prove correctness of TLB maintenance in the Fast DMA mapper"
+ depends on IOMMU_IO_PGTABLE_FAST
+ help
+ Enables some debug features that help prove correctness of TLB
+ maintenance routines in the Fast DMA mapper. This option will
+ slow things down considerably, so should only be used in a debug
+ configuration. This relies on the ability to set bits in an
+ invalid page table entry, which is disallowed on some hardware
+ due to errata. If you're running on such a platform then this
+ option can only be used with unit tests. It will break real use
+ cases.
+
+ If unsure, say N here.
+
endmenu
config IOMMU_IOVA
diff --git a/drivers/iommu/dma-mapping-fast.c b/drivers/iommu/dma-mapping-fast.c
index 4ee6e925dc08..342c5cd24257 100644
--- a/drivers/iommu/dma-mapping-fast.c
+++ b/drivers/iommu/dma-mapping-fast.c
@@ -80,6 +80,7 @@ static bool __bit_covered_stale(unsigned long upcoming_stale,
}
static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping,
+ struct dma_attrs *attrs,
size_t size)
{
unsigned long bit, prev_search_start, nbits = size >> FAST_PAGE_SHIFT;
@@ -114,8 +115,11 @@ static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping,
__bit_covered_stale(mapping->upcoming_stale_bit,
prev_search_start,
bit + nbits - 1)) {
+ bool skip_sync = dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs);
+
iommu_tlbiall(mapping->domain);
mapping->have_stale_tlbs = false;
+ av8l_fast_clear_stale_ptes(mapping->pgtbl_pmds, skip_sync);
}
return (bit << FAST_PAGE_SHIFT) + mapping->base;
@@ -287,7 +291,7 @@ static dma_addr_t fast_smmu_map_page(struct device *dev, struct page *page,
spin_lock_irqsave(&mapping->lock, flags);
- iova = __fast_smmu_alloc_iova(mapping, len);
+ iova = __fast_smmu_alloc_iova(mapping, attrs, len);
if (unlikely(iova == DMA_ERROR_CODE))
goto fail;
@@ -427,7 +431,7 @@ static void *fast_smmu_alloc(struct device *dev, size_t size,
}
spin_lock_irqsave(&mapping->lock, flags);
- dma_addr = __fast_smmu_alloc_iova(mapping, size);
+ dma_addr = __fast_smmu_alloc_iova(mapping, attrs, size);
if (dma_addr == DMA_ERROR_CODE) {
dev_err(dev, "no iova\n");
spin_unlock_irqrestore(&mapping->lock, flags);
@@ -519,6 +523,39 @@ static int fast_smmu_mapping_error(struct device *dev,
return dma_addr == DMA_ERROR_CODE;
}
+static void __fast_smmu_mapped_over_stale(struct dma_fast_smmu_mapping *fast,
+ void *data)
+{
+ av8l_fast_iopte *ptep = data;
+ dma_addr_t iova;
+ unsigned long bitmap_idx;
+
+ bitmap_idx = (unsigned long)(ptep - fast->pgtbl_pmds);
+ iova = bitmap_idx << FAST_PAGE_SHIFT;
+ dev_err(fast->dev, "Mapped over stale tlb at %pa\n", &iova);
+ dev_err(fast->dev, "bitmap (failure at idx %lu):\n", bitmap_idx);
+ dev_err(fast->dev, "ptep: %p pmds: %p diff: %lu\n", ptep,
+ fast->pgtbl_pmds, ptep - fast->pgtbl_pmds);
+ print_hex_dump(KERN_ERR, "bmap: ", DUMP_PREFIX_ADDRESS,
+ 32, 8, fast->bitmap, fast->bitmap_size, false);
+}
+
+static int fast_smmu_notify(struct notifier_block *self,
+ unsigned long action, void *data)
+{
+ struct dma_fast_smmu_mapping *fast = container_of(
+ self, struct dma_fast_smmu_mapping, notifier);
+
+ switch (action) {
+ case MAPPED_OVER_STALE_TLB:
+ __fast_smmu_mapped_over_stale(fast, data);
+ return NOTIFY_OK;
+ default:
+ WARN(1, "Unhandled notifier action");
+ return NOTIFY_DONE;
+ }
+}
+
static const struct dma_map_ops fast_smmu_dma_ops = {
.alloc = fast_smmu_alloc,
.free = fast_smmu_free,
@@ -620,6 +657,9 @@ int fast_smmu_attach_device(struct device *dev,
}
mapping->fast->pgtbl_pmds = info.pmds;
+ mapping->fast->notifier.notifier_call = fast_smmu_notify;
+ av8l_register_notify(&mapping->fast->notifier);
+
dev->archdata.mapping = mapping;
set_dma_ops(dev, &fast_smmu_dma_ops);
diff --git a/drivers/iommu/io-pgtable-fast.c b/drivers/iommu/io-pgtable-fast.c
index 141b66ce207a..44307f67fd22 100644
--- a/drivers/iommu/io-pgtable-fast.c
+++ b/drivers/iommu/io-pgtable-fast.c
@@ -140,6 +140,53 @@ struct av8l_fast_io_pgtable {
#define AV8L_FAST_PAGE_SHIFT 12
+#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB
+
+#include <asm/cacheflush.h>
+#include <linux/notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(av8l_notifier_list);
+
+void av8l_register_notify(struct notifier_block *nb)
+{
+ atomic_notifier_chain_register(&av8l_notifier_list, nb);
+}
+EXPORT_SYMBOL(av8l_register_notify);
+
+static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
+{
+ if (unlikely(*ptep)) {
+ atomic_notifier_call_chain(
+ &av8l_notifier_list, MAPPED_OVER_STALE_TLB,
+ (void *) ptep);
+ pr_err("Tried to map over a non-vacant pte: 0x%llx @ %p\n",
+ *ptep, ptep);
+ pr_err("Nearby memory:\n");
+ print_hex_dump(KERN_ERR, "pgtbl: ", DUMP_PREFIX_ADDRESS,
+ 32, 8, ptep - 16, 32 * sizeof(*ptep), false);
+ }
+}
+
+void av8l_fast_clear_stale_ptes(av8l_fast_iopte *pmds, bool skip_sync)
+{
+ int i;
+ av8l_fast_iopte *pmdp = pmds;
+
+ for (i = 0; i < ((SZ_1G * 4UL) >> AV8L_FAST_PAGE_SHIFT); ++i) {
+ if (!(*pmdp & AV8L_FAST_PTE_VALID)) {
+ *pmdp = 0;
+ if (!skip_sync)
+ dmac_clean_range(pmdp, pmdp + 1);
+ }
+ pmdp++;
+ }
+}
+#else
+static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
+{
+}
+#endif
+
/* caller must take care of cache maintenance on *ptep */
int av8l_fast_map_public(av8l_fast_iopte *ptep, phys_addr_t paddr, size_t size,
int prot)
@@ -164,8 +211,10 @@ int av8l_fast_map_public(av8l_fast_iopte *ptep, phys_addr_t paddr, size_t size,
pte |= AV8L_FAST_PTE_AP_RW;
paddr &= AV8L_FAST_PTE_ADDR_MASK;
- for (i = 0; i < nptes; i++, paddr += SZ_4K)
+ for (i = 0; i < nptes; i++, paddr += SZ_4K) {
+ __av8l_check_for_stale_tlb(ptep + i);
*(ptep + i) = pte | paddr;
+ }
return 0;
}
@@ -183,12 +232,21 @@ static int av8l_fast_map(struct io_pgtable_ops *ops, unsigned long iova,
return 0;
}
-/* caller must take care of cache maintenance on *ptep */
-void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size)
+static void __av8l_fast_unmap(av8l_fast_iopte *ptep, size_t size,
+ bool need_stale_tlb_tracking)
{
unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT;
+ int val = need_stale_tlb_tracking
+ ? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI
+ : 0;
- memset(ptep, 0, sizeof(*ptep) * nptes);
+ memset(ptep, val, sizeof(*ptep) * nptes);
+}
+
+/* caller must take care of cache maintenance on *ptep */
+void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size)
+{
+ __av8l_fast_unmap(ptep, size, true);
}
/* upper layer must take care of TLB invalidation */
@@ -199,7 +257,7 @@ static size_t av8l_fast_unmap(struct io_pgtable_ops *ops, unsigned long iova,
av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, iova);
unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT;
- av8l_fast_unmap_public(ptep, size);
+ __av8l_fast_unmap(ptep, size, false);
dmac_clean_range(ptep, ptep + nptes);
return size;
@@ -539,6 +597,9 @@ static int __init av8l_fast_positive_testing(void)
failed++;
}
+ /* sweep up TLB proving PTEs */
+ av8l_fast_clear_stale_ptes(pmds, false);
+
/* map the entire 4GB VA space with 8K map calls */
for (iova = 0; iova < SZ_1G * 4UL; iova += SZ_8K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) {
@@ -557,6 +618,9 @@ static int __init av8l_fast_positive_testing(void)
failed++;
}
+ /* sweep up TLB proving PTEs */
+ av8l_fast_clear_stale_ptes(pmds, false);
+
/* map the entire 4GB VA space with 16K map calls */
for (iova = 0; iova < SZ_1G * 4UL; iova += SZ_16K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) {
@@ -575,6 +639,9 @@ static int __init av8l_fast_positive_testing(void)
failed++;
}
+ /* sweep up TLB proving PTEs */
+ av8l_fast_clear_stale_ptes(pmds, false);
+
/* map the entire 4GB VA space with 64K map calls */
for (iova = 0; iova < SZ_1G * 4UL; iova += SZ_64K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) {
diff --git a/include/linux/dma-mapping-fast.h b/include/linux/dma-mapping-fast.h
index ad82efca2153..aa9fcfe73162 100644
--- a/include/linux/dma-mapping-fast.h
+++ b/include/linux/dma-mapping-fast.h
@@ -33,6 +33,7 @@ struct dma_fast_smmu_mapping {
av8l_fast_iopte *pgtbl_pmds;
spinlock_t lock;
+ struct notifier_block notifier;
};
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST
diff --git a/include/linux/io-pgtable-fast.h b/include/linux/io-pgtable-fast.h
index b482ffe8d20c..ab5a1dc6753e 100644
--- a/include/linux/io-pgtable-fast.h
+++ b/include/linux/io-pgtable-fast.h
@@ -13,6 +13,8 @@
#ifndef __LINUX_IO_PGTABLE_FAST_H
#define __LINUX_IO_PGTABLE_FAST_H
+#include <linux/notifier.h>
+
typedef u64 av8l_fast_iopte;
#define iopte_pmd_offset(pmds, iova) (pmds + (iova >> 12))
@@ -21,4 +23,35 @@ int av8l_fast_map_public(av8l_fast_iopte *ptep, phys_addr_t paddr, size_t size,
int prot);
void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size);
+/* events for notifiers passed to av8l_register_notify */
+#define MAPPED_OVER_STALE_TLB 1
+
+
+#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB
+/*
+ * Doesn't matter what we use as long as bit 0 is unset. The reason why we
+ * need a different value at all is that there are certain hardware
+ * platforms with erratum that require that a PTE actually be zero'd out
+ * and not just have its valid bit unset.
+ */
+#define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0xa
+
+void av8l_fast_clear_stale_ptes(av8l_fast_iopte *puds, bool skip_sync);
+void av8l_register_notify(struct notifier_block *nb);
+
+#else /* !CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */
+
+#define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0
+
+static inline void av8l_fast_clear_stale_ptes(av8l_fast_iopte *puds,
+ bool skip_sync)
+{
+}
+
+static inline void av8l_register_notify(struct notifier_block *nb)
+{
+}
+
+#endif /* CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */
+
#endif /* __LINUX_IO_PGTABLE_FAST_H */