From 41bb38fc5398ae878c799647f3c4b25374029afb Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 15 Jun 2012 15:23:06 -0400 Subject: tile pci: enable IOMMU to support DMA for legacy devices This change uses the TRIO IOMMU to map the PCI DMA space and physical memory at different addresses. We also now use the dma_mapping_ops to provide support for non-PCI DMA, PCIe DMA (64-bit) and legacy PCI DMA (32-bit). We use the kernel's software I/O TLB framework (i.e. bounce buffers) for the legacy 32-bit PCI device support since there are a limited number of TLB entries in the IOMMU and it is non-trivial to handle indexing, searching, matching, etc. For 32-bit devices the performance impact of bounce buffers should not be a concern. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/Kbuild | 1 - arch/tile/include/asm/device.h | 33 ++++++++ arch/tile/include/asm/dma-mapping.h | 146 ++++++++++++++++++++++++------------ arch/tile/include/asm/pci.h | 76 ++++++++++++++++--- 4 files changed, 198 insertions(+), 58 deletions(-) create mode 100644 arch/tile/include/asm/device.h (limited to 'arch/tile/include') diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 143473e3a0bb..fb7c65ae8de0 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -9,7 +9,6 @@ header-y += hardwall.h generic-y += bug.h generic-y += bugs.h generic-y += cputime.h -generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h generic-y += errno.h diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h new file mode 100644 index 000000000000..5182705bd056 --- /dev/null +++ b/arch/tile/include/asm/device.h @@ -0,0 +1,33 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * Arch specific extensions to struct device + */ + +#ifndef _ASM_TILE_DEVICE_H +#define _ASM_TILE_DEVICE_H + +struct dev_archdata { + /* DMA operations on that device */ + struct dma_map_ops *dma_ops; + + /* Offset of the DMA address from the PA. */ + dma_addr_t dma_offset; + + /* Highest DMA address that can be generated by this device. */ + dma_addr_t max_direct_dma_addr; +}; + +struct pdev_archdata { +}; + +#endif /* _ASM_TILE_DEVICE_H */ diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index eaa06d175b39..4b6247d1a315 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -20,69 +20,80 @@ #include #include -/* - * Note that on x86 and powerpc, there is a "struct dma_mapping_ops" - * that is used for all the DMA operations. For now, we don't have an - * equivalent on tile, because we only have a single way of doing DMA. - * (Tilera bug 7994 to use dma_mapping_ops.) - */ +extern struct dma_map_ops *tile_dma_map_ops; +extern struct dma_map_ops *gx_pci_dma_map_ops; +extern struct dma_map_ops *gx_legacy_pci_dma_map_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev && dev->archdata.dma_ops) + return dev->archdata.dma_ops; + else + return tile_dma_map_ops; +} + +static inline dma_addr_t get_dma_offset(struct device *dev) +{ + return dev->archdata.dma_offset; +} + +static inline void set_dma_offset(struct device *dev, dma_addr_t off) +{ + dev->archdata.dma_offset = off; +} -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction); -extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction); -extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction); -extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nhwentries, enum dma_data_direction); -extern dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction); -extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, - size_t size, enum dma_data_direction); -extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction); -extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction); - - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - -extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t, - enum dma_data_direction); -extern void dma_sync_single_for_device(struct device *, dma_addr_t, - size_t, enum dma_data_direction); -extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t, - unsigned long offset, size_t, - enum dma_data_direction); -extern void dma_sync_single_range_for_device(struct device *, dma_addr_t, - unsigned long offset, size_t, - enum dma_data_direction); -extern void dma_cache_sync(struct device *dev, void *vaddr, size_t, - enum dma_data_direction); +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr + get_dma_offset(dev); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr - get_dma_offset(dev); +} + +static inline void dma_mark_clean(void *addr, size_t size) {} + +#include + +static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +{ + dev->archdata.dma_ops = ops; +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size - 1 <= *dev->dma_mask; +} static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return 0; + return get_dma_ops(dev)->mapping_error(dev, dma_addr); } static inline int dma_supported(struct device *dev, u64 mask) { - return 1; + return get_dma_ops(dev)->dma_supported(dev, mask); } static inline int dma_set_mask(struct device *dev, u64 mask) { + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + /* Handle legacy PCI devices with limited memory addressability. */ + if ((dma_ops == gx_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) { + set_dma_ops(dev, gx_legacy_pci_dma_map_ops); + set_dma_offset(dev, 0); + if (mask > dev->archdata.max_direct_dma_addr) + mask = dev->archdata.max_direct_dma_addr; + } + if (!dev->dma_mask || !dma_supported(dev, mask)) return -EIO; @@ -91,4 +102,43 @@ dma_set_mask(struct device *dev, u64 mask) return 0; } +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + void *cpu_addr; + + cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs); + + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + + return cpu_addr; +} + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + + dma_ops->free(dev, size, cpu_addr, dma_handle, attrs); +} + +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) +#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL) +#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL) + +/* + * dma_alloc_noncoherent() is #defined to return coherent memory, + * so there's no need to do any flushing here. + */ +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ +} + #endif /* _ASM_TILE_DMA_MAPPING_H */ diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index 2c224c47d8ab..553b7ff018c4 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -15,6 +15,7 @@ #ifndef _ASM_TILE_PCI_H #define _ASM_TILE_PCI_H +#include #include #include #include @@ -53,6 +54,16 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} #define TILE_NUM_PCIE 2 +/* + * The hypervisor maps the entirety of CPA-space as bus addresses, so + * bus addresses are physical addresses. The networking and block + * device layers use this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS 1 + +/* generic pci stuff */ +#include + #else #include @@ -85,7 +96,47 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} /* * Each Mem-Map interrupt region occupies 4KB. */ -#define MEM_MAP_INTR_REGION_SIZE (1<< TRIO_MAP_MEM_LIM__ADDR_SHIFT) +#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT) + +/* + * Allocate the PCI BAR window right below 4GB. + */ +#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32) + +/* + * Allocate 1GB for the PCI BAR window. + */ +#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30) + +/* + * This is the highest bus address targeting the host memory that + * can be generated by legacy PCI devices with 32-bit or less + * DMA capability, dictated by the BAR window size and location. + */ +#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \ + (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1) + +/* + * We shift the PCI bus range for all the physical memory up by the whole PA + * range. The corresponding CPA of an incoming PCI request will be the PCI + * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies + * that the 64-bit capable devices will be given DMA addresses as + * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit + * devices, we create a separate map region that handles the low + * 4GB. + */ +#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH()) + +/* + * End of the PCI memory resource. + */ +#define TILE_PCI_MEM_END \ + ((1ULL << CHIP_PA_WIDTH()) + TILE_PCI_BAR_WINDOW_TOP) + +/* + * Start of the PCI memory resource. + */ +#define TILE_PCI_MEM_START (TILE_PCI_MEM_END - TILE_PCI_BAR_WINDOW_SIZE) /* * Structure of a PCI controller (host bridge) on Gx. @@ -108,6 +159,8 @@ struct pci_controller { int index; /* PCI domain number */ struct pci_bus *root_bus; + uint64_t mem_offset; /* cpu->bus memory mapping offset. */ + int last_busno; struct pci_ops *ops; @@ -126,14 +179,22 @@ extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO]; extern void pci_iounmap(struct pci_dev *dev, void __iomem *); -#endif /* __tilegx__ */ +extern void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res); + +extern void +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); /* - * The hypervisor maps the entirety of CPA-space as bus addresses, so - * bus addresses are physical addresses. The networking and block - * device layers use this boolean for bounce buffer decisions. + * The PCI address space does not equal the physical memory address + * space (we have an IOMMU). The IDE and SCSI device layers use this + * boolean for bounce buffer decisions. */ -#define PCI_DMA_BUS_IS_PHYS 1 +#define PCI_DMA_BUS_IS_PHYS 0 + +#endif /* __tilegx__ */ int __init tile_pci_init(void); int __init pcibios_init(void); @@ -169,7 +230,4 @@ static inline int pcibios_assign_all_busses(void) /* implement the pci_ DMA API in terms of the generic device dma_ one */ #include -/* generic pci stuff */ -#include - #endif /* _ASM_TILE_PCI_H */ -- cgit v1.2.3