diff options
| -rw-r--r-- | drivers/iommu/Kconfig | 15 | ||||
| -rw-r--r-- | drivers/iommu/dma-mapping-fast.c | 44 | ||||
| -rw-r--r-- | drivers/iommu/io-pgtable-fast.c | 77 | ||||
| -rw-r--r-- | include/linux/dma-mapping-fast.h | 1 | ||||
| -rw-r--r-- | include/linux/io-pgtable-fast.h | 33 |
5 files changed, 163 insertions, 7 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index fff7785726ae..f64111886584 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -60,6 +60,21 @@ config IOMMU_IO_PGTABLE_FAST_SELFTEST If unsure, say N here. +config IOMMU_IO_PGTABLE_FAST_PROVE_TLB + bool "Prove correctness of TLB maintenance in the Fast DMA mapper" + depends on IOMMU_IO_PGTABLE_FAST + help + Enables some debug features that help prove correctness of TLB + maintenance routines in the Fast DMA mapper. This option will + slow things down considerably, so should only be used in a debug + configuration. This relies on the ability to set bits in an + invalid page table entry, which is disallowed on some hardware + due to errata. If you're running on such a platform then this + option can only be used with unit tests. It will break real use + cases. + + If unsure, say N here. + endmenu config IOMMU_IOVA diff --git a/drivers/iommu/dma-mapping-fast.c b/drivers/iommu/dma-mapping-fast.c index 4ee6e925dc08..342c5cd24257 100644 --- a/drivers/iommu/dma-mapping-fast.c +++ b/drivers/iommu/dma-mapping-fast.c @@ -80,6 +80,7 @@ static bool __bit_covered_stale(unsigned long upcoming_stale, } static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping, + struct dma_attrs *attrs, size_t size) { unsigned long bit, prev_search_start, nbits = size >> FAST_PAGE_SHIFT; @@ -114,8 +115,11 @@ static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping, __bit_covered_stale(mapping->upcoming_stale_bit, prev_search_start, bit + nbits - 1)) { + bool skip_sync = dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs); + iommu_tlbiall(mapping->domain); mapping->have_stale_tlbs = false; + av8l_fast_clear_stale_ptes(mapping->pgtbl_pmds, skip_sync); } return (bit << FAST_PAGE_SHIFT) + mapping->base; @@ -287,7 +291,7 @@ static dma_addr_t fast_smmu_map_page(struct device *dev, struct page *page, spin_lock_irqsave(&mapping->lock, flags); - iova = __fast_smmu_alloc_iova(mapping, len); + iova = __fast_smmu_alloc_iova(mapping, attrs, len); if (unlikely(iova == DMA_ERROR_CODE)) goto fail; @@ -427,7 +431,7 @@ static void *fast_smmu_alloc(struct device *dev, size_t size, } spin_lock_irqsave(&mapping->lock, flags); - dma_addr = __fast_smmu_alloc_iova(mapping, size); + dma_addr = __fast_smmu_alloc_iova(mapping, attrs, size); if (dma_addr == DMA_ERROR_CODE) { dev_err(dev, "no iova\n"); spin_unlock_irqrestore(&mapping->lock, flags); @@ -519,6 +523,39 @@ static int fast_smmu_mapping_error(struct device *dev, return dma_addr == DMA_ERROR_CODE; } +static void __fast_smmu_mapped_over_stale(struct dma_fast_smmu_mapping *fast, + void *data) +{ + av8l_fast_iopte *ptep = data; + dma_addr_t iova; + unsigned long bitmap_idx; + + bitmap_idx = (unsigned long)(ptep - fast->pgtbl_pmds); + iova = bitmap_idx << FAST_PAGE_SHIFT; + dev_err(fast->dev, "Mapped over stale tlb at %pa\n", &iova); + dev_err(fast->dev, "bitmap (failure at idx %lu):\n", bitmap_idx); + dev_err(fast->dev, "ptep: %p pmds: %p diff: %lu\n", ptep, + fast->pgtbl_pmds, ptep - fast->pgtbl_pmds); + print_hex_dump(KERN_ERR, "bmap: ", DUMP_PREFIX_ADDRESS, + 32, 8, fast->bitmap, fast->bitmap_size, false); +} + +static int fast_smmu_notify(struct notifier_block *self, + unsigned long action, void *data) +{ + struct dma_fast_smmu_mapping *fast = container_of( + self, struct dma_fast_smmu_mapping, notifier); + + switch (action) { + case MAPPED_OVER_STALE_TLB: + __fast_smmu_mapped_over_stale(fast, data); + return NOTIFY_OK; + default: + WARN(1, "Unhandled notifier action"); + return NOTIFY_DONE; + } +} + static const struct dma_map_ops fast_smmu_dma_ops = { .alloc = fast_smmu_alloc, .free = fast_smmu_free, @@ -620,6 +657,9 @@ int fast_smmu_attach_device(struct device *dev, } mapping->fast->pgtbl_pmds = info.pmds; + mapping->fast->notifier.notifier_call = fast_smmu_notify; + av8l_register_notify(&mapping->fast->notifier); + dev->archdata.mapping = mapping; set_dma_ops(dev, &fast_smmu_dma_ops); diff --git a/drivers/iommu/io-pgtable-fast.c b/drivers/iommu/io-pgtable-fast.c index 141b66ce207a..44307f67fd22 100644 --- a/drivers/iommu/io-pgtable-fast.c +++ b/drivers/iommu/io-pgtable-fast.c @@ -140,6 +140,53 @@ struct av8l_fast_io_pgtable { #define AV8L_FAST_PAGE_SHIFT 12 +#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB + +#include <asm/cacheflush.h> +#include <linux/notifier.h> + +static ATOMIC_NOTIFIER_HEAD(av8l_notifier_list); + +void av8l_register_notify(struct notifier_block *nb) +{ + atomic_notifier_chain_register(&av8l_notifier_list, nb); +} +EXPORT_SYMBOL(av8l_register_notify); + +static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep) +{ + if (unlikely(*ptep)) { + atomic_notifier_call_chain( + &av8l_notifier_list, MAPPED_OVER_STALE_TLB, + (void *) ptep); + pr_err("Tried to map over a non-vacant pte: 0x%llx @ %p\n", + *ptep, ptep); + pr_err("Nearby memory:\n"); + print_hex_dump(KERN_ERR, "pgtbl: ", DUMP_PREFIX_ADDRESS, + 32, 8, ptep - 16, 32 * sizeof(*ptep), false); + } +} + +void av8l_fast_clear_stale_ptes(av8l_fast_iopte *pmds, bool skip_sync) +{ + int i; + av8l_fast_iopte *pmdp = pmds; + + for (i = 0; i < ((SZ_1G * 4UL) >> AV8L_FAST_PAGE_SHIFT); ++i) { + if (!(*pmdp & AV8L_FAST_PTE_VALID)) { + *pmdp = 0; + if (!skip_sync) + dmac_clean_range(pmdp, pmdp + 1); + } + pmdp++; + } +} +#else +static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep) +{ +} +#endif + /* caller must take care of cache maintenance on *ptep */ int av8l_fast_map_public(av8l_fast_iopte *ptep, phys_addr_t paddr, size_t size, int prot) @@ -164,8 +211,10 @@ int av8l_fast_map_public(av8l_fast_iopte *ptep, phys_addr_t paddr, size_t size, pte |= AV8L_FAST_PTE_AP_RW; paddr &= AV8L_FAST_PTE_ADDR_MASK; - for (i = 0; i < nptes; i++, paddr += SZ_4K) + for (i = 0; i < nptes; i++, paddr += SZ_4K) { + __av8l_check_for_stale_tlb(ptep + i); *(ptep + i) = pte | paddr; + } return 0; } @@ -183,12 +232,21 @@ static int av8l_fast_map(struct io_pgtable_ops *ops, unsigned long iova, return 0; } -/* caller must take care of cache maintenance on *ptep */ -void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size) +static void __av8l_fast_unmap(av8l_fast_iopte *ptep, size_t size, + bool need_stale_tlb_tracking) { unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT; + int val = need_stale_tlb_tracking + ? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI + : 0; - memset(ptep, 0, sizeof(*ptep) * nptes); + memset(ptep, val, sizeof(*ptep) * nptes); +} + +/* caller must take care of cache maintenance on *ptep */ +void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size) +{ + __av8l_fast_unmap(ptep, size, true); } /* upper layer must take care of TLB invalidation */ @@ -199,7 +257,7 @@ static size_t av8l_fast_unmap(struct io_pgtable_ops *ops, unsigned long iova, av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, iova); unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT; - av8l_fast_unmap_public(ptep, size); + __av8l_fast_unmap(ptep, size, false); dmac_clean_range(ptep, ptep + nptes); return size; @@ -539,6 +597,9 @@ static int __init av8l_fast_positive_testing(void) failed++; } + /* sweep up TLB proving PTEs */ + av8l_fast_clear_stale_ptes(pmds, false); + /* map the entire 4GB VA space with 8K map calls */ for (iova = 0; iova < SZ_1G * 4UL; iova += SZ_8K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) { @@ -557,6 +618,9 @@ static int __init av8l_fast_positive_testing(void) failed++; } + /* sweep up TLB proving PTEs */ + av8l_fast_clear_stale_ptes(pmds, false); + /* map the entire 4GB VA space with 16K map calls */ for (iova = 0; iova < SZ_1G * 4UL; iova += SZ_16K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) { @@ -575,6 +639,9 @@ static int __init av8l_fast_positive_testing(void) failed++; } + /* sweep up TLB proving PTEs */ + av8l_fast_clear_stale_ptes(pmds, false); + /* map the entire 4GB VA space with 64K map calls */ for (iova = 0; iova < SZ_1G * 4UL; iova += SZ_64K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) { diff --git a/include/linux/dma-mapping-fast.h b/include/linux/dma-mapping-fast.h index ad82efca2153..aa9fcfe73162 100644 --- a/include/linux/dma-mapping-fast.h +++ b/include/linux/dma-mapping-fast.h @@ -33,6 +33,7 @@ struct dma_fast_smmu_mapping { av8l_fast_iopte *pgtbl_pmds; spinlock_t lock; + struct notifier_block notifier; }; #ifdef CONFIG_IOMMU_IO_PGTABLE_FAST diff --git a/include/linux/io-pgtable-fast.h b/include/linux/io-pgtable-fast.h index b482ffe8d20c..ab5a1dc6753e 100644 --- a/include/linux/io-pgtable-fast.h +++ b/include/linux/io-pgtable-fast.h @@ -13,6 +13,8 @@ #ifndef __LINUX_IO_PGTABLE_FAST_H #define __LINUX_IO_PGTABLE_FAST_H +#include <linux/notifier.h> + typedef u64 av8l_fast_iopte; #define iopte_pmd_offset(pmds, iova) (pmds + (iova >> 12)) @@ -21,4 +23,35 @@ int av8l_fast_map_public(av8l_fast_iopte *ptep, phys_addr_t paddr, size_t size, int prot); void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size); +/* events for notifiers passed to av8l_register_notify */ +#define MAPPED_OVER_STALE_TLB 1 + + +#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB +/* + * Doesn't matter what we use as long as bit 0 is unset. The reason why we + * need a different value at all is that there are certain hardware + * platforms with erratum that require that a PTE actually be zero'd out + * and not just have its valid bit unset. + */ +#define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0xa + +void av8l_fast_clear_stale_ptes(av8l_fast_iopte *puds, bool skip_sync); +void av8l_register_notify(struct notifier_block *nb); + +#else /* !CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */ + +#define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0 + +static inline void av8l_fast_clear_stale_ptes(av8l_fast_iopte *puds, + bool skip_sync) +{ +} + +static inline void av8l_register_notify(struct notifier_block *nb) +{ +} + +#endif /* CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */ + #endif /* __LINUX_IO_PGTABLE_FAST_H */ |
