// SPDX-License-Identifier: GPL-2.0 /* * Implementation of Gasket page table support. * * Copyright (C) 2018 Google, Inc. */ /* * Implementation of Gasket page table support. * * This file assumes 4kB pages throughout; can be factored out when necessary. * * There is a configurable number of page table entries, as well as a * configurable bit index for the extended address flag. Both of these are * specified in gasket_page_table_init through the page_table_config parameter. * * The following example assumes: * page_table_config->total_entries = 8192 * page_table_config->extended_bit = 63 * * Address format: * Simple addresses - those whose containing pages are directly placed in the * device's address translation registers - are laid out as: * [ 63 - 25: 0 | 24 - 12: page index | 11 - 0: page offset ] * page index: The index of the containing page in the device's address * translation registers. * page offset: The index of the address into the containing page. * * Extended address - those whose containing pages are contained in a second- * level page table whose address is present in the device's address translation * registers - are laid out as: * [ 63: flag | 62 - 34: 0 | 33 - 21: dev/level 0 index | * 20 - 12: host/level 1 index | 11 - 0: page offset ] * flag: Marker indicating that this is an extended address. Always 1. * dev index: The index of the first-level page in the device's extended * address translation registers. * host index: The index of the containing page in the [host-resident] second- * level page table. * page offset: The index of the address into the containing [second-level] * page. */ #include "gasket_page_table.h" #include #include #include #include #include #include #include #include #include "gasket_constants.h" #include "gasket_core.h" /* Constants & utility macros */ /* The number of pages that can be mapped into each second-level page table. */ #define GASKET_PAGES_PER_SUBTABLE 512 /* The starting position of the page index in a simple virtual address. */ #define GASKET_SIMPLE_PAGE_SHIFT 12 /* Flag indicating that a [device] slot is valid for use. */ #define GASKET_VALID_SLOT_FLAG 1 /* * The starting position of the level 0 page index (i.e., the entry in the * device's extended address registers) in an extended address. * Also can be thought of as (log2(PAGE_SIZE) + log2(PAGES_PER_SUBTABLE)), * or (12 + 9). */ #define GASKET_EXTENDED_LVL0_SHIFT 21 /* * Number of first level pages that Gasket chips support. Equivalent to * log2(NUM_LVL0_PAGE_TABLES) * * At a maximum, allowing for a 34 bits address space (or 16GB) * = GASKET_EXTENDED_LVL0_WIDTH + (log2(PAGE_SIZE) + log2(PAGES_PER_SUBTABLE) * or, = 13 + 9 + 12 */ #define GASKET_EXTENDED_LVL0_WIDTH 13 /* * The starting position of the level 1 page index (i.e., the entry in the * host second-level/sub- table) in an extended address. */ #define GASKET_EXTENDED_LVL1_SHIFT 12 /* Type declarations */ /* Valid states for a struct gasket_page_table_entry. */ enum pte_status { PTE_FREE, PTE_INUSE, }; /* * Mapping metadata for a single page. * * In this file, host-side page table entries are referred to as that (or PTEs). * Where device vs. host entries are differentiated, device-side or -visible * entries are called "slots". A slot may be either an entry in the device's * address translation table registers or an entry in a second-level page * table ("subtable"). * * The full data in this structure is visible on the host [of course]. Only * the address contained in dma_addr is communicated to the device; that points * to the actual page mapped and described by this structure. */ struct gasket_page_table_entry { /* The status of this entry/slot: free or in use. */ enum pte_status status; /* * Index for alignment into host vaddrs. * When a user specifies a host address for a mapping, that address may * not be page-aligned. Offset is the index into the containing page of * the host address (i.e., host_vaddr & (PAGE_SIZE - 1)). * This is necessary for translating between user-specified addresses * and page-aligned addresses. */ int offset; /* Address of the page in DMA space. */ dma_addr_t dma_addr; /* Linux page descriptor for the page described by this structure. */ struct page *page; /* * If this is an extended and first-level entry, sublevel points * to the second-level entries underneath this entry. */ struct gasket_page_table_entry *sublevel; }; /* * Maintains virtual to physical address mapping for a coherent page that is * allocated by this module for a given device. * Note that coherent pages mappings virt mapping cannot be tracked by the * Linux kernel, and coherent pages don't have a struct page associated, * hence Linux kernel cannot perform a get_user_page_xx() on a phys address * that was allocated coherent. * This structure trivially implements this mechanism. */ struct gasket_coherent_page_entry { /* Phys address, dma'able by the owner device */ dma_addr_t paddr; /* Kernel virtual address */ u64 user_virt; /* User virtual address that was mapped by the mmap kernel subsystem */ u64 kernel_virt; /* * Whether this page has been mapped into a user land process virtual * space */ u32 in_use; }; /* * [Host-side] page table descriptor. * * This structure tracks the metadata necessary to manage both simple and * extended page tables. */ struct gasket_page_table { /* The config used to create this page table. */ struct gasket_page_table_config config; /* The number of simple (single-level) entries in the page table. */ uint num_simple_entries; /* The number of extended (two-level) entries in the page table. */ uint num_extended_entries; /* Array of [host-side] page table entries. */ struct gasket_page_table_entry *entries; /* Number of actively mapped kernel pages in this table. */ uint num_active_pages; /* Device register: base of/first slot in the page table. */ u64 __iomem *base_slot; /* Device register: holds the offset indicating the start of the * extended address region of the device's address translation table. */ u64 __iomem *extended_offset_reg; /* Device structure for the underlying device. Only used for logging. */ struct device *device; /* PCI system descriptor for the underlying device. */ struct pci_dev *pci_dev; /* Location of the extended address bit for this Gasket device. */ u64 extended_flag; /* Mutex to protect page table internals. */ struct mutex mutex; /* Number of coherent pages accessible thru by this page table */ int num_coherent_pages; /* * List of coherent memory (physical) allocated for a device. * * This structure also remembers the user virtual mapping, this is * hacky, but we need to do this because the kernel doesn't keep track * of the user coherent pages (pfn pages), and virt to coherent page * mapping. * TODO: use find_vma() APIs to convert host address to vm_area, to * dma_addr_t instead of storing user virtu address in * gasket_coherent_page_entry * * Note that the user virtual mapping is created by the driver, in * gasket_mmap function, so user_virt belongs in the driver anyhow. */ struct gasket_coherent_page_entry *coherent_pages; }; /* See gasket_page_table.h for description. */ int gasket_page_table_init(struct gasket_page_table **ppg_tbl, const struct gasket_bar_data *bar_data, const struct gasket_page_table_config *page_table_config, struct device *device, struct pci_dev *pci_dev) { ulong bytes; struct gasket_page_table *pg_tbl; ulong total_entries = page_table_config->total_entries; /* * TODO: Verify config->total_entries against value read from the * hardware register that contains the page table size. */ if (total_entries == ULONG_MAX) { dev_dbg(device, "Error reading page table size. " "Initializing page table with size 0\n"); total_entries = 0; } dev_dbg(device, "Attempting to initialize page table of size 0x%lx\n", total_entries); dev_dbg(device, "Table has base reg 0x%x, extended offset reg 0x%x\n", page_table_config->base_reg, page_table_config->extended_reg); *ppg_tbl = kzalloc(sizeof(**ppg_tbl), GFP_KERNEL); if (!*ppg_tbl) { dev_dbg(device, "No memory for page table\n"); return -ENOMEM; } pg_tbl = *ppg_tbl; bytes = total_entries * sizeof(struct gasket_page_table_entry); if (bytes != 0) { pg_tbl->entries = vzalloc(bytes); if (!pg_tbl->entries) { dev_dbg(device, "No memory for address translation metadata\n"); kfree(pg_tbl); *ppg_tbl = NULL; return -ENOMEM; } } mutex_init(&pg_tbl->mutex); memcpy(&pg_tbl->config, page_table_config, sizeof(*page_table_config)); if (pg_tbl->config.mode == GASKET_PAGE_TABLE_MODE_NORMAL || pg_tbl->config.mode == GASKET_PAGE_TABLE_MODE_SIMPLE) { pg_tbl->num_simple_entries = total_entries; pg_tbl->num_extended_entries = 0; pg_tbl->extended_flag = 1ull << page_table_config->extended_bit; } else { pg_tbl->num_simple_entries = 0; pg_tbl->num_extended_entries = total_entries; pg_tbl->extended_flag = 0; } pg_tbl->num_active_pages = 0; pg_tbl->base_slot = (u64 __iomem *)&bar_data->virt_base[page_table_config->base_reg]; pg_tbl->extended_offset_reg = (u64 __iomem *)&bar_data->virt_base[page_table_config->extended_reg]; pg_tbl->device = get_device(device); pg_tbl->pci_dev = pci_dev; dev_dbg(device, "Page table initialized successfully\n"); return 0; } /* * Check if a range of PTEs is free. * The page table mutex must be held by the caller. */ static bool gasket_is_pte_range_free(struct gasket_page_table_entry *ptes, uint num_entries) { int i; for (i = 0; i < num_entries; i++) { if (ptes[i].status != PTE_FREE) return false; } return true; } /* * Free a second level page [sub]table. * The page table mutex must be held before this call. */ static void gasket_free_extended_subtable(struct gasket_page_table *pg_tbl, struct gasket_page_table_entry *pte, u64 __iomem *slot) { /* Release the page table from the driver */ pte->status = PTE_FREE; /* Release the page table from the device */ writeq(0, slot); if (pte->dma_addr) dma_unmap_page(pg_tbl->device, pte->dma_addr, PAGE_SIZE, DMA_TO_DEVICE); vfree(pte->sublevel); if (pte->page) free_page((ulong)page_address(pte->page)); memset(pte, 0, sizeof(struct gasket_page_table_entry)); } /* * Actually perform collection. * The page table mutex must be held by the caller. */ static void gasket_page_table_garbage_collect_nolock(struct gasket_page_table *pg_tbl) { struct gasket_page_table_entry *pte; u64 __iomem *slot; /* XXX FIX ME XXX -- more efficient to keep a usage count */ /* rather than scanning the second level page tables */ for (pte = pg_tbl->entries + pg_tbl->num_simple_entries, slot = pg_tbl->base_slot + pg_tbl->num_simple_entries; pte < pg_tbl->entries + pg_tbl->config.total_entries; pte++, slot++) { if (pte->status == PTE_INUSE) { if (gasket_is_pte_range_free(pte->sublevel, GASKET_PAGES_PER_SUBTABLE)) gasket_free_extended_subtable(pg_tbl, pte, slot); } } } /* See gasket_page_table.h for description. */ void gasket_page_table_garbage_collect(struct gasket_page_table *pg_tbl) { mutex_lock(&pg_tbl->mutex); gasket_page_table_garbage_collect_nolock(pg_tbl); mutex_unlock(&pg_tbl->mutex); } /* See gasket_page_table.h for description. */ void gasket_page_table_cleanup(struct gasket_page_table *pg_tbl) { /* Deallocate free second-level tables. */ gasket_page_table_garbage_collect(pg_tbl); /* TODO: Check that all PTEs have been freed? */ vfree(pg_tbl->entries); pg_tbl->entries = NULL; put_device(pg_tbl->device); kfree(pg_tbl); } /* See gasket_page_table.h for description. */ int gasket_page_table_partition(struct gasket_page_table *pg_tbl, uint num_simple_entries) { int i, start; mutex_lock(&pg_tbl->mutex); if (num_simple_entries > pg_tbl->config.total_entries) { mutex_unlock(&pg_tbl->mutex); return -EINVAL; } gasket_page_table_garbage_collect_nolock(pg_tbl); start = min(pg_tbl->num_simple_entries, num_simple_entries); for (i = start; i < pg_tbl->config.total_entries; i++) { if (pg_tbl->entries[i].status != PTE_FREE) { dev_err(pg_tbl->device, "entry %d is not free\n", i); mutex_unlock(&pg_tbl->mutex); return -EBUSY; } } pg_tbl->num_simple_entries = num_simple_entries; pg_tbl->num_extended_entries = pg_tbl->config.total_entries - num_simple_entries; writeq(num_simple_entries, pg_tbl->extended_offset_reg); mutex_unlock(&pg_tbl->mutex); return 0; } EXPORT_SYMBOL(gasket_page_table_partition); /* * Return whether a host buffer was mapped as coherent memory. * * A Gasket page_table currently support one contiguous dma range, mapped to one * contiguous virtual memory range. Check if the host_addr is within that range. */ static int is_coherent(struct gasket_page_table *pg_tbl, ulong host_addr) { u64 min, max; /* whether the host address is within user virt range */ if (!pg_tbl->coherent_pages) return 0; min = (u64)pg_tbl->coherent_pages[0].user_virt; max = min + PAGE_SIZE * pg_tbl->num_coherent_pages; return min <= host_addr && host_addr < max; } /* Safely return a page to the OS. */ static bool gasket_release_page(struct page *page) { if (!page) return false; if (!PageReserved(page)) SetPageDirty(page); put_page(page); return true; } /* * Get and map last level page table buffers. * * slots is the location(s) to write device-mapped page address. If this is a * simple mapping, these will be address translation registers. If this is * an extended mapping, these will be within a second-level page table * allocated by the host and so must have their __iomem attribute casted away. */ static int gasket_perform_mapping(struct gasket_page_table *pg_tbl, struct gasket_page_table_entry *ptes, u64 __iomem *slots, ulong host_addr, uint num_pages, int is_simple_mapping) { int ret; ulong offset; struct page *page; dma_addr_t dma_addr; ulong page_addr; int i; for (i = 0; i < num_pages; i++) { page_addr = host_addr + i * PAGE_SIZE; offset = page_addr & (PAGE_SIZE - 1); if (is_coherent(pg_tbl, host_addr)) { u64 off = (u64)host_addr - (u64)pg_tbl->coherent_pages[0].user_virt; ptes[i].page = NULL; ptes[i].offset = offset; ptes[i].dma_addr = pg_tbl->coherent_pages[0].paddr + off + i * PAGE_SIZE; } else { ret = get_user_pages_fast(page_addr - offset, 1, 1, &page); if (ret <= 0) { dev_err(pg_tbl->device, "get user pages failed for addr=0x%lx, " "offset=0x%lx [ret=%d]\n", page_addr, offset, ret); return ret ? ret : -ENOMEM; } ++pg_tbl->num_active_pages; ptes[i].page = page; ptes[i].offset = offset; /* Map the page into DMA space. */ ptes[i].dma_addr = dma_map_page(pg_tbl->device, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(pg_tbl->device, ptes[i].dma_addr)) { if (gasket_release_page(ptes[i].page)) --pg_tbl->num_active_pages; memset(&ptes[i], 0, sizeof(struct gasket_page_table_entry)); return -EINVAL; } } /* Make the DMA-space address available to the device. */ dma_addr = (ptes[i].dma_addr + offset) | GASKET_VALID_SLOT_FLAG; if (is_simple_mapping) { writeq(dma_addr, &slots[i]); } else { ((u64 __force *)slots)[i] = dma_addr; /* Extended page table vectors are in DRAM, * and so need to be synced each time they are updated. */ dma_map_single(pg_tbl->device, (void *)&((u64 __force *)slots)[i], sizeof(u64), DMA_TO_DEVICE); } ptes[i].status = PTE_INUSE; } return 0; } /* * Return the index of the page for the address in the simple table. * Does not perform validity checking. */ static int gasket_simple_page_idx(struct gasket_page_table *pg_tbl, ulong dev_addr) { return (dev_addr >> GASKET_SIMPLE_PAGE_SHIFT) & (pg_tbl->config.total_entries - 1); } /* * Return the level 0 page index for the given address. * Does not perform validity checking. */ static ulong gasket_extended_lvl0_page_idx(struct gasket_page_table *pg_tbl, ulong dev_addr) { return (dev_addr >> GASKET_EXTENDED_LVL0_SHIFT) & (pg_tbl->config.total_entries - 1); } /* * Return the level 1 page index for the given address. * Does not perform validity checking. */ static ulong gasket_extended_lvl1_page_idx(struct gasket_page_table *pg_tbl, ulong dev_addr) { return (dev_addr >> GASKET_EXTENDED_LVL1_SHIFT) & (GASKET_PAGES_PER_SUBTABLE - 1); } /* * Allocate page table entries in a simple table. * The page table mutex must be held by the caller. */ static int gasket_alloc_simple_entries(struct gasket_page_table *pg_tbl, ulong dev_addr, uint num_pages) { if (!gasket_is_pte_range_free(pg_tbl->entries + gasket_simple_page_idx(pg_tbl, dev_addr), num_pages)) return -EBUSY; return 0; } /* * Unmap and release mapped pages. * The page table mutex must be held by the caller. */ static void gasket_perform_unmapping(struct gasket_page_table *pg_tbl, struct gasket_page_table_entry *ptes, u64 __iomem *slots, uint num_pages, int is_simple_mapping) { int i; /* * For each page table entry and corresponding entry in the device's * address translation table: */ for (i = 0; i < num_pages; i++) { /* release the address from the device, */ if (is_simple_mapping || ptes[i].status == PTE_INUSE) { writeq(0, &slots[i]); } else { ((u64 __force *)slots)[i] = 0; /* sync above PTE update before updating mappings */ wmb(); } /* release the address from the driver, */ if (ptes[i].status == PTE_INUSE) { if (ptes[i].page && ptes[i].dma_addr) { dma_unmap_page(pg_tbl->device, ptes[i].dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); } if (gasket_release_page(ptes[i].page)) --pg_tbl->num_active_pages; } /* and clear the PTE. */ memset(&ptes[i], 0, sizeof(struct gasket_page_table_entry)); } } /* * Unmap and release pages mapped to simple addresses. * The page table mutex must be held by the caller. */ static void gasket_unmap_simple_pages(struct gasket_page_table *pg_tbl, ulong dev_addr, uint num_pages) { uint slot = gasket_simple_page_idx(pg_tbl, dev_addr); gasket_perform_unmapping(pg_tbl, pg_tbl->entries + slot, pg_tbl->base_slot + slot, num_pages, 1); } /* * Unmap and release buffers to extended addresses. * The page table mutex must be held by the caller. */ static void gasket_unmap_extended_pages(struct gasket_page_table *pg_tbl, ulong dev_addr, uint num_pages) { uint slot_idx, remain, len; struct gasket_page_table_entry *pte; u64 __iomem *slot_base; remain = num_pages; slot_idx = gasket_extended_lvl1_page_idx(pg_tbl, dev_addr); pte = pg_tbl->entries + pg_tbl->num_simple_entries + gasket_extended_lvl0_page_idx(pg_tbl, dev_addr); while (remain > 0) { /* TODO: Add check to ensure pte remains valid? */ len = min(remain, GASKET_PAGES_PER_SUBTABLE - slot_idx); if (pte->status == PTE_INUSE) { slot_base = (u64 __iomem *)(page_address(pte->page) + pte->offset); gasket_perform_unmapping(pg_tbl, pte->sublevel + slot_idx, slot_base + slot_idx, len, 0); } remain -= len; slot_idx = 0; pte++; } } /* Evaluates to nonzero if the specified virtual address is simple. */ static inline bool gasket_addr_is_simple(struct gasket_page_table *pg_tbl, ulong addr) { return !((addr) & (pg_tbl)->extended_flag); } /* * Convert (simple, page, offset) into a device address. * Examples: * Simple page 0, offset 32: * Input (1, 0, 32), Output 0x20 * Simple page 1000, offset 511: * Input (1, 1000, 511), Output 0x3E81FF * Extended page 0, offset 32: * Input (0, 0, 32), Output 0x8000000020 * Extended page 1000, offset 511: * Input (0, 1000, 511), Output 0x8003E81FF */ static ulong gasket_components_to_dev_address(struct gasket_page_table *pg_tbl, int is_simple, uint page_index, uint offset) { ulong dev_addr = (page_index << GASKET_SIMPLE_PAGE_SHIFT) | offset; return is_simple ? dev_addr : (pg_tbl->extended_flag | dev_addr); } /* * Validity checking for simple addresses. * * Verify that address translation commutes (from address to/from page + offset) * and that the requested page range starts and ends within the set of * currently-partitioned simple pages. */ static bool gasket_is_simple_dev_addr_bad(struct gasket_page_table *pg_tbl, ulong dev_addr, uint num_pages) { ulong page_offset = dev_addr & (PAGE_SIZE - 1); ulong page_index = (dev_addr / PAGE_SIZE) & (pg_tbl->config.total_entries - 1); if (gasket_components_to_dev_address(pg_tbl, 1, page_index, page_offset) != dev_addr) { dev_err(pg_tbl->device, "address is invalid, 0x%lX\n", dev_addr); return true; } if (page_index >= pg_tbl->num_simple_entries) { dev_err(pg_tbl->device, "starting slot at %lu is too large, max is < %u\n", page_index, pg_tbl->num_simple_entries); return true; } if (page_index + num_pages > pg_tbl->num_simple_entries) { dev_err(pg_tbl->device, "ending slot at %lu is too large, max is <= %u\n", page_index + num_pages, pg_tbl->num_simple_entries); return true; } return false; } /* * Validity checking for extended addresses. * * Verify that address translation commutes (from address to/from page + * offset) and that the requested page range starts and ends within the set of * currently-partitioned extended pages. */ static bool gasket_is_extended_dev_addr_bad(struct gasket_page_table *pg_tbl, ulong dev_addr, uint num_pages) { /* Starting byte index of dev_addr into the first mapped page */ ulong page_offset = dev_addr & (PAGE_SIZE - 1); ulong page_global_idx, page_lvl0_idx; ulong num_lvl0_pages; ulong addr; /* check if the device address is out of bound */ addr = dev_addr & ~((pg_tbl)->extended_flag); if (addr >> (GASKET_EXTENDED_LVL0_WIDTH + GASKET_EXTENDED_LVL0_SHIFT)) { dev_err(pg_tbl->device, "device address out of bounds: 0x%lx\n", dev_addr); return true; } /* Find the starting sub-page index in the space of all sub-pages. */ page_global_idx = (dev_addr / PAGE_SIZE) & (pg_tbl->config.total_entries * GASKET_PAGES_PER_SUBTABLE - 1); /* Find the starting level 0 index. */ page_lvl0_idx = gasket_extended_lvl0_page_idx(pg_tbl, dev_addr); /* Get the count of affected level 0 pages. */ num_lvl0_pages = DIV_ROUND_UP(num_pages, GASKET_PAGES_PER_SUBTABLE); if (gasket_components_to_dev_address(pg_tbl, 0, page_global_idx, page_offset) != dev_addr) { dev_err(pg_tbl->device, "address is invalid: 0x%lx\n", dev_addr); return true; } if (page_lvl0_idx >= pg_tbl->num_extended_entries) { dev_err(pg_tbl->device, "starting level 0 slot at %lu is too large, max is < " "%u\n", page_lvl0_idx, pg_tbl->num_extended_entries); return true; } if (page_lvl0_idx + num_lvl0_pages > pg_tbl->num_extended_entries) { dev_err(pg_tbl->device, "ending level 0 slot at %lu is too large, max is <= %u\n", page_lvl0_idx + num_lvl0_pages, pg_tbl->num_extended_entries); return true; } return false; } /* * Non-locking entry to unmapping routines. * The page table mutex must be held by the caller. */ static void gasket_page_table_unmap_nolock(struct gasket_page_table *pg_tbl, ulong dev_addr, uint num_pages) { if (!num_pages) return; if (gasket_addr_is_simple(pg_tbl, dev_addr)) gasket_unmap_simple_pages(pg_tbl, dev_addr, num_pages); else gasket_unmap_extended_pages(pg_tbl, dev_addr, num_pages); } /* * Allocate and map pages to simple addresses. * If there is an error, no pages are mapped. */ static int gasket_map_simple_pages(struct gasket_page_table *pg_tbl, ulong host_addr, ulong dev_addr, uint num_pages) { int ret; uint slot_idx = gasket_simple_page_idx(pg_tbl, dev_addr); ret = gasket_alloc_simple_entries(pg_tbl, dev_addr, num_pages); if (ret) { dev_err(pg_tbl->device, "page table slots %u (@ 0x%lx) to %u are not available\n", slot_idx, dev_addr, slot_idx + num_pages - 1); return ret; } ret = gasket_perform_mapping(pg_tbl, pg_tbl->entries + slot_idx, pg_tbl->base_slot + slot_idx, host_addr, num_pages, 1); if (ret) { gasket_page_table_unmap_nolock(pg_tbl, dev_addr, num_pages); dev_err(pg_tbl->device, "gasket_perform_mapping %d\n", ret); } return ret; } /* * Allocate a second level page table. * The page table mutex must be held by the caller. */ static int gasket_alloc_extended_subtable(struct gasket_page_table *pg_tbl, struct gasket_page_table_entry *pte, u64 __iomem *slot) { ulong page_addr, subtable_bytes; dma_addr_t dma_addr; /* XXX FIX ME XXX this is inefficient for non-4K page sizes */ /* GFP_DMA flag must be passed to architectures for which * part of the memory range is not considered DMA'able. * This seems to be the case for Juno board with 4.5.0 Linaro kernel */ page_addr = get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!page_addr) return -ENOMEM; pte->page = virt_to_page((void *)page_addr); pte->offset = 0; subtable_bytes = sizeof(struct gasket_page_table_entry) * GASKET_PAGES_PER_SUBTABLE; pte->sublevel = vzalloc(subtable_bytes); if (!pte->sublevel) { free_page(page_addr); memset(pte, 0, sizeof(struct gasket_page_table_entry)); return -ENOMEM; } /* Map the page into DMA space. */ pte->dma_addr = dma_map_page(pg_tbl->device, pte->page, 0, PAGE_SIZE, DMA_TO_DEVICE); if (dma_mapping_error(pg_tbl->device, pte->dma_addr)) { free_page(page_addr); vfree(pte->sublevel); memset(pte, 0, sizeof(struct gasket_page_table_entry)); return -ENOMEM; } /* make the addresses available to the device */ dma_addr = (pte->dma_addr + pte->offset) | GASKET_VALID_SLOT_FLAG; writeq(dma_addr, slot); pte->status = PTE_INUSE; return 0; } /* * Allocate slots in an extended page table. Check to see if a range of page * table slots are available. If necessary, memory is allocated for second level * page tables. * * Note that memory for second level page tables is allocated as needed, but * that memory is only freed on the final close of the device file, when the * page tables are repartitioned, or the the device is removed. If there is an * error or if the full range of slots is not available, any memory * allocated for second level page tables remains allocated until final close, * repartition, or device removal. * * The page table mutex must be held by the caller. */ static int gasket_alloc_extended_entries(struct gasket_page_table *pg_tbl, ulong dev_addr, uint num_entries) { int ret = 0; uint remain, subtable_slot_idx, len; struct gasket_page_table_entry *pte; u64 __iomem *slot; remain = num_entries; subtable_slot_idx = gasket_extended_lvl1_page_idx(pg_tbl, dev_addr); pte = pg_tbl->entries + pg_tbl->num_simple_entries + gasket_extended_lvl0_page_idx(pg_tbl, dev_addr); slot = pg_tbl->base_slot + pg_tbl->num_simple_entries + gasket_extended_lvl0_page_idx(pg_tbl, dev_addr); while (remain > 0) { len = min(remain, GASKET_PAGES_PER_SUBTABLE - subtable_slot_idx); if (pte->status == PTE_FREE) { ret = gasket_alloc_extended_subtable(pg_tbl, pte, slot); if (ret) { dev_err(pg_tbl->device, "no memory for extended addr subtable\n"); return ret; } } else { if (!gasket_is_pte_range_free(pte->sublevel + subtable_slot_idx, len)) return -EBUSY; } remain -= len; subtable_slot_idx = 0; pte++; slot++; } return 0; } /* * gasket_map_extended_pages - Get and map buffers to extended addresses. * If there is an error, no pages are mapped. */ static int gasket_map_extended_pages(struct gasket_page_table *pg_tbl, ulong host_addr, ulong dev_addr, uint num_pages) { int ret; ulong dev_addr_end; uint slot_idx, remain, len; struct gasket_page_table_entry *pte; u64 __iomem *slot_base; ret = gasket_alloc_extended_entries(pg_tbl, dev_addr, num_pages); if (ret) { dev_addr_end = dev_addr + (num_pages / PAGE_SIZE) - 1; dev_err(pg_tbl->device, "page table slots (%lu,%lu) (@ 0x%lx) to (%lu,%lu) are " "not available\n", gasket_extended_lvl0_page_idx(pg_tbl, dev_addr), dev_addr, gasket_extended_lvl1_page_idx(pg_tbl, dev_addr), gasket_extended_lvl0_page_idx(pg_tbl, dev_addr_end), gasket_extended_lvl1_page_idx(pg_tbl, dev_addr_end)); return ret; } remain = num_pages; slot_idx = gasket_extended_lvl1_page_idx(pg_tbl, dev_addr); pte = pg_tbl->entries + pg_tbl->num_simple_entries + gasket_extended_lvl0_page_idx(pg_tbl, dev_addr); while (remain > 0) { len = min(remain, GASKET_PAGES_PER_SUBTABLE - slot_idx); slot_base = (u64 __iomem *)(page_address(pte->page) + pte->offset); ret = gasket_perform_mapping(pg_tbl, pte->sublevel + slot_idx, slot_base + slot_idx, host_addr, len, 0); if (ret) { gasket_page_table_unmap_nolock(pg_tbl, dev_addr, num_pages); return ret; } remain -= len; slot_idx = 0; pte++; host_addr += len * PAGE_SIZE; } return 0; } /* * See gasket_page_table.h for general description. * * gasket_page_table_map calls either gasket_map_simple_pages() or * gasket_map_extended_pages() to actually perform the mapping. * * The page table mutex is held for the entire operation. */ int gasket_page_table_map(struct gasket_page_table *pg_tbl, ulong host_addr, ulong dev_addr, uint num_pages) { int ret; if (!num_pages) return 0; mutex_lock(&pg_tbl->mutex); if (gasket_addr_is_simple(pg_tbl, dev_addr)) { ret = gasket_map_simple_pages(pg_tbl, host_addr, dev_addr, num_pages); } else { ret = gasket_map_extended_pages(pg_tbl, host_addr, dev_addr, num_pages); } mutex_unlock(&pg_tbl->mutex); return ret; } EXPORT_SYMBOL(gasket_page_table_map); /* * See gasket_page_table.h for general description. * * gasket_page_table_unmap takes the page table lock and calls either * gasket_unmap_simple_pages() or gasket_unmap_extended_pages() to * actually unmap the pages from device space. * * The page table mutex is held for the entire operation. */ void gasket_page_table_unmap(struct gasket_page_table *pg_tbl, ulong dev_addr, uint num_pages) { if (!num_pages) return; mutex_lock(&pg_tbl->mutex); gasket_page_table_unmap_nolock(pg_tbl, dev_addr, num_pages); mutex_unlock(&pg_tbl->mutex); } EXPORT_SYMBOL(gasket_page_table_unmap); static void gasket_page_table_unmap_all_nolock(struct gasket_page_table *pg_tbl) { gasket_unmap_simple_pages(pg_tbl, gasket_components_to_dev_address(pg_tbl, 1, 0, 0), pg_tbl->num_simple_entries); gasket_unmap_extended_pages(pg_tbl, gasket_components_to_dev_address(pg_tbl, 0, 0, 0), pg_tbl->num_extended_entries * GASKET_PAGES_PER_SUBTABLE); } /* See gasket_page_table.h for description. */ void gasket_page_table_unmap_all(struct gasket_page_table *pg_tbl) { mutex_lock(&pg_tbl->mutex); gasket_page_table_unmap_all_nolock(pg_tbl); mutex_unlock(&pg_tbl->mutex); } EXPORT_SYMBOL(gasket_page_table_unmap_all); /* See gasket_page_table.h for description. */ void gasket_page_table_reset(struct gasket_page_table *pg_tbl) { mutex_lock(&pg_tbl->mutex); gasket_page_table_unmap_all_nolock(pg_tbl); writeq(pg_tbl->config.total_entries, pg_tbl->extended_offset_reg); mutex_unlock(&pg_tbl->mutex); } /* See gasket_page_table.h for description. */ int gasket_page_table_lookup_page(struct gasket_page_table *pg_tbl, ulong dev_addr, struct page **ppage, ulong *poffset) { uint page_num; struct gasket_page_table_entry *pte; mutex_lock(&pg_tbl->mutex); if (gasket_addr_is_simple(pg_tbl, dev_addr)) { page_num = gasket_simple_page_idx(pg_tbl, dev_addr); if (page_num >= pg_tbl->num_simple_entries) goto fail; pte = pg_tbl->entries + page_num; if (pte->status != PTE_INUSE) goto fail; } else { /* Find the level 0 entry, */ page_num = gasket_extended_lvl0_page_idx(pg_tbl, dev_addr); if (page_num >= pg_tbl->num_extended_entries) goto fail; pte = pg_tbl->entries + pg_tbl->num_simple_entries + page_num; if (pte->status != PTE_INUSE) goto fail; /* and its contained level 1 entry. */ page_num = gasket_extended_lvl1_page_idx(pg_tbl, dev_addr); pte = pte->sublevel + page_num; if (pte->status != PTE_INUSE) goto fail; } *ppage = pte->page; *poffset = pte->offset; mutex_unlock(&pg_tbl->mutex); return 0; fail: *ppage = NULL; *poffset = 0; mutex_unlock(&pg_tbl->mutex); return -EINVAL; } /* See gasket_page_table.h for description. */ bool gasket_page_table_are_addrs_bad(struct gasket_page_table *pg_tbl, ulong host_addr, ulong dev_addr, ulong bytes) { if (host_addr & (PAGE_SIZE - 1)) { dev_err(pg_tbl->device, "host mapping address 0x%lx must be page aligned\n", host_addr); return true; } return gasket_page_table_is_dev_addr_bad(pg_tbl, dev_addr, bytes); } EXPORT_SYMBOL(gasket_page_table_are_addrs_bad); /* See gasket_page_table.h for description. */ bool gasket_page_table_is_dev_addr_bad(struct gasket_page_table *pg_tbl, ulong dev_addr, ulong bytes) { uint num_pages = bytes / PAGE_SIZE; if (bytes & (PAGE_SIZE - 1)) { dev_err(pg_tbl->device, "mapping size 0x%lX must be page aligned\n", bytes); return true; } if (num_pages == 0) { dev_err(pg_tbl->device, "requested mapping is less than one page: %lu / %lu\n", bytes, PAGE_SIZE); return true; } if (gasket_addr_is_simple(pg_tbl, dev_addr)) return gasket_is_simple_dev_addr_bad(pg_tbl, dev_addr, num_pages); return gasket_is_extended_dev_addr_bad(pg_tbl, dev_addr, num_pages); } EXPORT_SYMBOL(gasket_page_table_is_dev_addr_bad); /* See gasket_page_table.h for description. */ uint gasket_page_table_max_size(struct gasket_page_table *page_table) { if (!page_table) return 0; return page_table->config.total_entries; } EXPORT_SYMBOL(gasket_page_table_max_size); /* See gasket_page_table.h for description. */ uint gasket_page_table_num_entries(struct gasket_page_table *pg_tbl) { if (!pg_tbl) return 0; return pg_tbl->num_simple_entries + pg_tbl->num_extended_entries; } EXPORT_SYMBOL(gasket_page_table_num_entries); /* See gasket_page_table.h for description. */ uint gasket_page_table_num_simple_entries(struct gasket_page_table *pg_tbl) { if (!pg_tbl) return 0; return pg_tbl->num_simple_entries; } EXPORT_SYMBOL(gasket_page_table_num_simple_entries); /* See gasket_page_table.h for description. */ uint gasket_page_table_num_active_pages(struct gasket_page_table *pg_tbl) { if (!pg_tbl) return 0; return pg_tbl->num_active_pages; } EXPORT_SYMBOL(gasket_page_table_num_active_pages); /* See gasket_page_table.h */ int gasket_page_table_system_status(struct gasket_page_table *page_table) { if (!page_table) return GASKET_STATUS_LAMED; if (gasket_page_table_num_entries(page_table) == 0) { dev_dbg(page_table->device, "Page table size is 0\n"); return GASKET_STATUS_LAMED; } return GASKET_STATUS_ALIVE; } /* Record the host_addr to coherent dma memory mapping. */ int gasket_set_user_virt(struct gasket_dev *gasket_dev, u64 size, dma_addr_t dma_address, ulong vma) { int j; struct gasket_page_table *pg_tbl; unsigned int num_pages = size / PAGE_SIZE; /* * TODO: for future chipset, better handling of the case where multiple * page tables are supported on a given device */ pg_tbl = gasket_dev->page_table[0]; if (!pg_tbl) { dev_dbg(gasket_dev->dev, "%s: invalid page table index\n", __func__); return 0; } for (j = 0; j < num_pages; j++) { pg_tbl->coherent_pages[j].user_virt = (u64)vma + j * PAGE_SIZE; } return 0; } /* Allocate a block of coherent memory. */ int gasket_alloc_coherent_memory(struct gasket_dev *gasket_dev, u64 size, dma_addr_t *dma_address, u64 index) { dma_addr_t handle; void *mem; int j; unsigned int num_pages = DIV_ROUND_UP(size, PAGE_SIZE); const struct gasket_driver_desc *driver_desc = gasket_get_driver_desc(gasket_dev); if (!gasket_dev->page_table[index]) return -EFAULT; if (num_pages == 0) return -EINVAL; mem = dma_alloc_coherent(gasket_get_device(gasket_dev), num_pages * PAGE_SIZE, &handle, GFP_KERNEL); if (!mem) goto nomem; gasket_dev->page_table[index]->num_coherent_pages = num_pages; /* allocate the physical memory block */ gasket_dev->page_table[index]->coherent_pages = kcalloc(num_pages, sizeof(*gasket_dev->page_table[index]->coherent_pages), GFP_KERNEL); if (!gasket_dev->page_table[index]->coherent_pages) goto nomem; gasket_dev->coherent_buffer.length_bytes = PAGE_SIZE * (num_pages); gasket_dev->coherent_buffer.phys_base = handle; gasket_dev->coherent_buffer.virt_base = mem; *dma_address = driver_desc->coherent_buffer_description.base; for (j = 0; j < num_pages; j++) { gasket_dev->page_table[index]->coherent_pages[j].paddr = handle + j * PAGE_SIZE; gasket_dev->page_table[index]->coherent_pages[j].kernel_virt = (u64)mem + j * PAGE_SIZE; } return 0; nomem: if (mem) { dma_free_coherent(gasket_get_device(gasket_dev), num_pages * PAGE_SIZE, mem, handle); gasket_dev->coherent_buffer.length_bytes = 0; gasket_dev->coherent_buffer.virt_base = NULL; gasket_dev->coherent_buffer.phys_base = 0; } kfree(gasket_dev->page_table[index]->coherent_pages); gasket_dev->page_table[index]->coherent_pages = NULL; gasket_dev->page_table[index]->num_coherent_pages = 0; return -ENOMEM; } /* Free a block of coherent memory. */ int gasket_free_coherent_memory(struct gasket_dev *gasket_dev, u64 size, dma_addr_t dma_address, u64 index) { const struct gasket_driver_desc *driver_desc; if (!gasket_dev->page_table[index]) return -EFAULT; driver_desc = gasket_get_driver_desc(gasket_dev); if (driver_desc->coherent_buffer_description.base != dma_address) return -EADDRNOTAVAIL; if (gasket_dev->coherent_buffer.length_bytes) { dma_free_coherent(gasket_get_device(gasket_dev), gasket_dev->coherent_buffer.length_bytes, gasket_dev->coherent_buffer.virt_base, gasket_dev->coherent_buffer.phys_base); gasket_dev->coherent_buffer.length_bytes = 0; gasket_dev->coherent_buffer.virt_base = NULL; gasket_dev->coherent_buffer.phys_base = 0; } kfree(gasket_dev->page_table[index]->coherent_pages); gasket_dev->page_table[index]->coherent_pages = NULL; gasket_dev->page_table[index]->num_coherent_pages = 0; return 0; } /* Release all coherent memory. */ void gasket_free_coherent_memory_all(struct gasket_dev *gasket_dev, u64 index) { if (!gasket_dev->page_table[index]) return; if (gasket_dev->coherent_buffer.length_bytes) { dma_free_coherent(gasket_get_device(gasket_dev), gasket_dev->coherent_buffer.length_bytes, gasket_dev->coherent_buffer.virt_base, gasket_dev->coherent_buffer.phys_base); gasket_dev->coherent_buffer.length_bytes = 0; gasket_dev->coherent_buffer.virt_base = NULL; gasket_dev->coherent_buffer.phys_base = 0; } }