diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nouveau_dmem.c')
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_dmem.c | 962 |
1 files changed, 399 insertions, 563 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 1333220787a1..7ba66ad68a8a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -25,14 +25,22 @@ #include "nouveau_dma.h" #include "nouveau_mem.h" #include "nouveau_bo.h" +#include "nouveau_svm.h" #include <nvif/class.h> #include <nvif/object.h> +#include <nvif/push906f.h> +#include <nvif/if000c.h> #include <nvif/if500b.h> #include <nvif/if900b.h> +#include <nvif/if000c.h> + +#include <nvhw/class/cla0b5.h> #include <linux/sched/mm.h> #include <linux/hmm.h> +#include <linux/memremap.h> +#include <linux/migrate.h> /* * FIXME: this is ugly right now we are using TTM to allocate vram and we pin @@ -44,8 +52,6 @@ #define DMEM_CHUNK_SIZE (2UL << 20) #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) -struct nouveau_migrate; - enum nouveau_aper { NOUVEAU_APER_VIRT, NOUVEAU_APER_VRAM, @@ -55,235 +61,163 @@ enum nouveau_aper { typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages, enum nouveau_aper, u64 dst_addr, enum nouveau_aper, u64 src_addr); +typedef int (*nouveau_clear_page_t)(struct nouveau_drm *drm, u32 length, + enum nouveau_aper, u64 dst_addr); struct nouveau_dmem_chunk { struct list_head list; struct nouveau_bo *bo; struct nouveau_drm *drm; - unsigned long pfn_first; unsigned long callocated; - unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)]; - spinlock_t lock; + struct dev_pagemap pagemap; }; struct nouveau_dmem_migrate { nouveau_migrate_copy_t copy_func; + nouveau_clear_page_t clear_func; struct nouveau_channel *chan; }; struct nouveau_dmem { struct nouveau_drm *drm; - struct dev_pagemap pagemap; struct nouveau_dmem_migrate migrate; - struct list_head chunk_free; - struct list_head chunk_full; - struct list_head chunk_empty; + struct list_head chunks; struct mutex mutex; + struct page *free_pages; + spinlock_t lock; }; -static inline struct nouveau_dmem *page_to_dmem(struct page *page) +static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page) { - return container_of(page->pgmap, struct nouveau_dmem, pagemap); + return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap); } -struct nouveau_dmem_fault { - struct nouveau_drm *drm; - struct nouveau_fence *fence; - dma_addr_t *dma; - unsigned long npages; -}; +static struct nouveau_drm *page_to_drm(struct page *page) +{ + struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); -struct nouveau_migrate { - struct vm_area_struct *vma; - struct nouveau_drm *drm; - struct nouveau_fence *fence; - unsigned long npages; - dma_addr_t *dma; - unsigned long dma_nr; -}; + return chunk->drm; +} + +unsigned long nouveau_dmem_page_addr(struct page *page) +{ + struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); + unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) - + chunk->pagemap.range.start; + + return chunk->bo->offset + off; +} static void nouveau_dmem_page_free(struct page *page) { - struct nouveau_dmem_chunk *chunk = page->zone_device_data; - unsigned long idx = page_to_pfn(page) - chunk->pfn_first; + struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); + struct nouveau_dmem *dmem = chunk->drm->dmem; + + spin_lock(&dmem->lock); + page->zone_device_data = dmem->free_pages; + dmem->free_pages = page; - /* - * FIXME: - * - * This is really a bad example, we need to overhaul nouveau memory - * management to be more page focus and allow lighter locking scheme - * to be use in the process. - */ - spin_lock(&chunk->lock); - clear_bit(idx, chunk->bitmap); WARN_ON(!chunk->callocated); chunk->callocated--; /* * FIXME when chunk->callocated reach 0 we should add the chunk to * a reclaim list so that it can be freed in case of memory pressure. */ - spin_unlock(&chunk->lock); + spin_unlock(&dmem->lock); } -static void -nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static void nouveau_dmem_fence_done(struct nouveau_fence **fence) { - struct nouveau_dmem_fault *fault = private; - struct nouveau_drm *drm = fault->drm; - struct device *dev = drm->dev->dev; - unsigned long addr, i, npages = 0; - nouveau_migrate_copy_t copy; - int ret; - - - /* First allocate new memory */ - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *dpage, *spage; - - dst_pfns[i] = 0; - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) - continue; - - dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr); - if (!dpage) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - continue; - } - lock_page(dpage); - - dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED; - npages++; - } - - /* Allocate storage for DMA addresses, so we can unmap later. */ - fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL); - if (!fault->dma) - goto error; - - /* Copy things over */ - copy = drm->dmem->migrate.copy_func; - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct nouveau_dmem_chunk *chunk; - struct page *spage, *dpage; - u64 src_addr, dst_addr; - - dpage = migrate_pfn_to_page(dst_pfns[i]); - if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; - - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } - - fault->dma[fault->npages] = - dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, fault->dma[fault->npages])) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } - - dst_addr = fault->dma[fault->npages++]; - - chunk = spage->zone_device_data; - src_addr = page_to_pfn(spage) - chunk->pfn_first; - src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; - - ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr, - NOUVEAU_APER_VRAM, src_addr); - if (ret) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } + if (fence) { + nouveau_fence_wait(*fence, true, false); + nouveau_fence_unref(fence); + } else { + /* + * FIXME wait for channel to be IDLE before calling finalizing + * the hmem object. + */ } +} - nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence); - - return; - -error: - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { - struct page *page; +static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm, + struct vm_fault *vmf, struct migrate_vma *args, + dma_addr_t *dma_addr) +{ + struct device *dev = drm->dev->dev; + struct page *dpage, *spage; + struct nouveau_svmm *svmm; - if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; + spage = migrate_pfn_to_page(args->src[0]); + if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE)) + return 0; - page = migrate_pfn_to_page(dst_pfns[i]); - dst_pfns[i] = MIGRATE_PFN_ERROR; - if (page == NULL) - continue; + dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address); + if (!dpage) + return VM_FAULT_SIGBUS; + lock_page(dpage); - __free_page(page); - } -} + *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) + goto error_free_page; -void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, - const unsigned long *src_pfns, - const unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) -{ - struct nouveau_dmem_fault *fault = private; - struct nouveau_drm *drm = fault->drm; + svmm = spage->zone_device_data; + mutex_lock(&svmm->mutex); + nouveau_svmm_invalidate(svmm, args->start, args->end); + if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr, + NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage))) + goto error_dma_unmap; + mutex_unlock(&svmm->mutex); - if (fault->fence) { - nouveau_fence_wait(fault->fence, true, false); - nouveau_fence_unref(&fault->fence); - } else { - /* - * FIXME wait for channel to be IDLE before calling finalizing - * the hmem object below (nouveau_migrate_hmem_fini()). - */ - } + args->dst[0] = migrate_pfn(page_to_pfn(dpage)); + return 0; - while (fault->npages--) { - dma_unmap_page(drm->dev->dev, fault->dma[fault->npages], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - } - kfree(fault->dma); +error_dma_unmap: + mutex_unlock(&svmm->mutex); + dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +error_free_page: + __free_page(dpage); + return VM_FAULT_SIGBUS; } -static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { - .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy, - .finalize_and_map = nouveau_dmem_fault_finalize_and_map, -}; - static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) { - struct nouveau_dmem *dmem = page_to_dmem(vmf->page); - unsigned long src[1] = {0}, dst[1] = {0}; - struct nouveau_dmem_fault fault = { .drm = dmem->drm }; - int ret; + struct nouveau_drm *drm = page_to_drm(vmf->page); + struct nouveau_dmem *dmem = drm->dmem; + struct nouveau_fence *fence; + unsigned long src = 0, dst = 0; + dma_addr_t dma_addr = 0; + vm_fault_t ret; + struct migrate_vma args = { + .vma = vmf->vma, + .start = vmf->address, + .end = vmf->address + PAGE_SIZE, + .src = &src, + .dst = &dst, + .pgmap_owner = drm->dev, + .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE, + }; /* * FIXME what we really want is to find some heuristic to migrate more * than just one page on CPU fault. When such fault happens it is very * likely that more surrounding page will CPU fault too. */ - ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma, - vmf->address, vmf->address + PAGE_SIZE, - src, dst, &fault); - if (ret) + if (migrate_vma_setup(&args) < 0) return VM_FAULT_SIGBUS; + if (!args.cpages) + return 0; - if (dst[0] == MIGRATE_PFN_ERROR) - return VM_FAULT_SIGBUS; + ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr); + if (ret || dst == 0) + goto done; - return 0; + nouveau_fence_new(dmem->migrate.chan, false, &fence); + migrate_vma_pages(&args); + nouveau_dmem_fence_done(&fence); + dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +done: + migrate_vma_finalize(&args); + return ret; } static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { @@ -292,132 +226,106 @@ static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { }; static int -nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) +nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage) { struct nouveau_dmem_chunk *chunk; + struct resource *res; + struct page *page; + void *ptr; + unsigned long i, pfn_first; int ret; - if (drm->dmem == NULL) - return -EINVAL; - - mutex_lock(&drm->dmem->mutex); - chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, - struct nouveau_dmem_chunk, - list); + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); if (chunk == NULL) { - mutex_unlock(&drm->dmem->mutex); - return -ENOMEM; + ret = -ENOMEM; + goto out; } - list_del(&chunk->list); - mutex_unlock(&drm->dmem->mutex); + /* Allocate unused physical address space for device private pages. */ + res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE, + "nouveau_dmem"); + if (IS_ERR(res)) { + ret = PTR_ERR(res); + goto out_free; + } + + chunk->drm = drm; + chunk->pagemap.type = MEMORY_DEVICE_PRIVATE; + chunk->pagemap.range.start = res->start; + chunk->pagemap.range.end = res->end; + chunk->pagemap.nr_range = 1; + chunk->pagemap.ops = &nouveau_dmem_pagemap_ops; + chunk->pagemap.owner = drm->dev; ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0, - TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL, + NOUVEAU_GEM_DOMAIN_VRAM, 0, 0, NULL, NULL, &chunk->bo); if (ret) - goto out; + goto out_release; - ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); - if (ret) { - nouveau_bo_ref(NULL, &chunk->bo); - goto out; - } + ret = nouveau_bo_pin(chunk->bo, NOUVEAU_GEM_DOMAIN_VRAM, false); + if (ret) + goto out_bo_free; - bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES); - spin_lock_init(&chunk->lock); + ptr = memremap_pages(&chunk->pagemap, numa_node_id()); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + goto out_bo_unpin; + } -out: mutex_lock(&drm->dmem->mutex); - if (chunk->bo) - list_add(&chunk->list, &drm->dmem->chunk_empty); - else - list_add_tail(&chunk->list, &drm->dmem->chunk_empty); + list_add(&chunk->list, &drm->dmem->chunks); mutex_unlock(&drm->dmem->mutex); - return ret; -} - -static struct nouveau_dmem_chunk * -nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm) -{ - struct nouveau_dmem_chunk *chunk; - - chunk = list_first_entry_or_null(&drm->dmem->chunk_free, - struct nouveau_dmem_chunk, - list); - if (chunk) - return chunk; - - chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, - struct nouveau_dmem_chunk, - list); - if (chunk->bo) - return chunk; - - return NULL; -} - -static int -nouveau_dmem_pages_alloc(struct nouveau_drm *drm, - unsigned long npages, - unsigned long *pages) -{ - struct nouveau_dmem_chunk *chunk; - unsigned long c; - int ret; - - memset(pages, 0xff, npages * sizeof(*pages)); - - mutex_lock(&drm->dmem->mutex); - for (c = 0; c < npages;) { - unsigned long i; - - chunk = nouveau_dmem_chunk_first_free_locked(drm); - if (chunk == NULL) { - mutex_unlock(&drm->dmem->mutex); - ret = nouveau_dmem_chunk_alloc(drm); - if (ret) { - if (c) - return 0; - return ret; - } - mutex_lock(&drm->dmem->mutex); - continue; - } - - spin_lock(&chunk->lock); - i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES); - while (i < DMEM_CHUNK_NPAGES && c < npages) { - pages[c] = chunk->pfn_first + i; - set_bit(i, chunk->bitmap); - chunk->callocated++; - c++; - - i = find_next_zero_bit(chunk->bitmap, - DMEM_CHUNK_NPAGES, i); - } - spin_unlock(&chunk->lock); + pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT; + page = pfn_to_page(pfn_first); + spin_lock(&drm->dmem->lock); + for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) { + page->zone_device_data = drm->dmem->free_pages; + drm->dmem->free_pages = page; } - mutex_unlock(&drm->dmem->mutex); + *ppage = page; + chunk->callocated++; + spin_unlock(&drm->dmem->lock); + + NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", + DMEM_CHUNK_SIZE >> 20); return 0; + +out_bo_unpin: + nouveau_bo_unpin(chunk->bo); +out_bo_free: + nouveau_bo_ref(NULL, &chunk->bo); +out_release: + release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range)); +out_free: + kfree(chunk); +out: + return ret; } static struct page * nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) { - unsigned long pfns[1]; - struct page *page; + struct nouveau_dmem_chunk *chunk; + struct page *page = NULL; int ret; - /* FIXME stop all the miss-match API ... */ - ret = nouveau_dmem_pages_alloc(drm, 1, pfns); - if (ret) - return NULL; + spin_lock(&drm->dmem->lock); + if (drm->dmem->free_pages) { + page = drm->dmem->free_pages; + drm->dmem->free_pages = page->zone_device_data; + chunk = nouveau_page_to_chunk(page); + chunk->callocated++; + spin_unlock(&drm->dmem->lock); + } else { + spin_unlock(&drm->dmem->lock); + ret = nouveau_dmem_chunk_alloc(drm, &page); + if (ret) + return NULL; + } - page = pfn_to_page(pfns[0]); - get_page(page); lock_page(page); return page; } @@ -439,13 +347,8 @@ nouveau_dmem_resume(struct nouveau_drm *drm) return; mutex_lock(&drm->dmem->mutex); - list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { - ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); - /* FIXME handle pin failure */ - WARN_ON(ret); - } - list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { - ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); + list_for_each_entry(chunk, &drm->dmem->chunks, list) { + ret = nouveau_bo_pin(chunk->bo, NOUVEAU_GEM_DOMAIN_VRAM, false); /* FIXME handle pin failure */ WARN_ON(ret); } @@ -461,12 +364,8 @@ nouveau_dmem_suspend(struct nouveau_drm *drm) return; mutex_lock(&drm->dmem->mutex); - list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { + list_for_each_entry(chunk, &drm->dmem->chunks, list) nouveau_bo_unpin(chunk->bo); - } - list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { - nouveau_bo_unpin(chunk->bo); - } mutex_unlock(&drm->dmem->mutex); } @@ -480,15 +379,13 @@ nouveau_dmem_fini(struct nouveau_drm *drm) mutex_lock(&drm->dmem->mutex); - WARN_ON(!list_empty(&drm->dmem->chunk_free)); - WARN_ON(!list_empty(&drm->dmem->chunk_full)); - - list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) { - if (chunk->bo) { - nouveau_bo_unpin(chunk->bo); - nouveau_bo_ref(NULL, &chunk->bo); - } + list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) { + nouveau_bo_unpin(chunk->bo); + nouveau_bo_ref(NULL, &chunk->bo); list_del(&chunk->list); + memunmap_pages(&chunk->pagemap); + release_mem_region(chunk->pagemap.range.start, + range_len(&chunk->pagemap.range)); kfree(chunk); } @@ -500,57 +397,128 @@ nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, enum nouveau_aper dst_aper, u64 dst_addr, enum nouveau_aper src_aper, u64 src_addr) { - struct nouveau_channel *chan = drm->dmem->migrate.chan; - u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ | - (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ | - (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ | - (1 << 2) /* FLUSH_ENABLE_TRUE. */ | - (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */; + struct nvif_push *push = drm->dmem->migrate.chan->chan.push; + u32 launch_dma = 0; int ret; - ret = RING_SPACE(chan, 13); + ret = PUSH_WAIT(push, 13); if (ret) return ret; if (src_aper != NOUVEAU_APER_VIRT) { switch (src_aper) { case NOUVEAU_APER_VRAM: - BEGIN_IMC0(chan, NvSubCopy, 0x0260, 0); + PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE, + NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB)); break; case NOUVEAU_APER_HOST: - BEGIN_IMC0(chan, NvSubCopy, 0x0260, 1); + PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE, + NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM)); break; default: return -EINVAL; } - launch_dma |= 0x00001000; /* SRC_TYPE_PHYSICAL. */ + + launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL); } if (dst_aper != NOUVEAU_APER_VIRT) { switch (dst_aper) { case NOUVEAU_APER_VRAM: - BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0); + PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, + NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB)); break; case NOUVEAU_APER_HOST: - BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1); + PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, + NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM)); break; default: return -EINVAL; } - launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */ + + launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL); + } + + PUSH_MTHD(push, NVA0B5, OFFSET_IN_UPPER, + NVVAL(NVA0B5, OFFSET_IN_UPPER, UPPER, upper_32_bits(src_addr)), + + OFFSET_IN_LOWER, lower_32_bits(src_addr), + + OFFSET_OUT_UPPER, + NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)), + + OFFSET_OUT_LOWER, lower_32_bits(dst_addr), + PITCH_IN, PAGE_SIZE, + PITCH_OUT, PAGE_SIZE, + LINE_LENGTH_IN, PAGE_SIZE, + LINE_COUNT, npages); + + PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma | + NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) | + NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) | + NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) | + NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) | + NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) | + NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) | + NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, TRUE) | + NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) | + NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING)); + return 0; +} + +static int +nvc0b5_migrate_clear(struct nouveau_drm *drm, u32 length, + enum nouveau_aper dst_aper, u64 dst_addr) +{ + struct nvif_push *push = drm->dmem->migrate.chan->chan.push; + u32 launch_dma = 0; + int ret; + + ret = PUSH_WAIT(push, 12); + if (ret) + return ret; + + switch (dst_aper) { + case NOUVEAU_APER_VRAM: + PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, + NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB)); + break; + case NOUVEAU_APER_HOST: + PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, + NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM)); + break; + default: + return -EINVAL; } - BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8); - OUT_RING (chan, upper_32_bits(src_addr)); - OUT_RING (chan, lower_32_bits(src_addr)); - OUT_RING (chan, upper_32_bits(dst_addr)); - OUT_RING (chan, lower_32_bits(dst_addr)); - OUT_RING (chan, PAGE_SIZE); - OUT_RING (chan, PAGE_SIZE); - OUT_RING (chan, PAGE_SIZE); - OUT_RING (chan, npages); - BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1); - OUT_RING (chan, launch_dma); + launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL); + + PUSH_MTHD(push, NVA0B5, SET_REMAP_CONST_A, 0, + SET_REMAP_CONST_B, 0, + + SET_REMAP_COMPONENTS, + NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_X, CONST_A) | + NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_Y, CONST_B) | + NVDEF(NVA0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) | + NVDEF(NVA0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, TWO)); + + PUSH_MTHD(push, NVA0B5, OFFSET_OUT_UPPER, + NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)), + + OFFSET_OUT_LOWER, lower_32_bits(dst_addr)); + + PUSH_MTHD(push, NVA0B5, LINE_LENGTH_IN, length >> 3); + + PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma | + NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) | + NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) | + NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) | + NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) | + NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) | + NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) | + NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) | + NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, TRUE) | + NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING)); return 0; } @@ -563,6 +531,7 @@ nouveau_dmem_migrate_init(struct nouveau_drm *drm) case VOLTA_DMA_COPY_A: case TURING_DMA_COPY_A: drm->dmem->migrate.copy_func = nvc0b5_migrate_copy; + drm->dmem->migrate.clear_func = nvc0b5_migrate_clear; drm->dmem->migrate.chan = drm->ttm.chan; return 0; default: @@ -574,9 +543,6 @@ nouveau_dmem_migrate_init(struct nouveau_drm *drm) void nouveau_dmem_init(struct nouveau_drm *drm) { - struct device *device = drm->dev->dev; - struct resource *res; - unsigned long i, size, pfn_first; int ret; /* This only make sense on PASCAL or newer */ @@ -588,282 +554,152 @@ nouveau_dmem_init(struct nouveau_drm *drm) drm->dmem->drm = drm; mutex_init(&drm->dmem->mutex); - INIT_LIST_HEAD(&drm->dmem->chunk_free); - INIT_LIST_HEAD(&drm->dmem->chunk_full); - INIT_LIST_HEAD(&drm->dmem->chunk_empty); - - size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE); + INIT_LIST_HEAD(&drm->dmem->chunks); + mutex_init(&drm->dmem->mutex); + spin_lock_init(&drm->dmem->lock); /* Initialize migration dma helpers before registering memory */ ret = nouveau_dmem_migrate_init(drm); - if (ret) - goto out_free; - - /* - * FIXME we need some kind of policy to decide how much VRAM we - * want to register with HMM. For now just register everything - * and latter if we want to do thing like over commit then we - * could revisit this. - */ - res = devm_request_free_mem_region(device, &iomem_resource, size); - if (IS_ERR(res)) - goto out_free; - drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE; - drm->dmem->pagemap.res = *res; - drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops; - if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap))) - goto out_free; - - pfn_first = res->start >> PAGE_SHIFT; - for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) { - struct nouveau_dmem_chunk *chunk; - struct page *page; - unsigned long j; - - chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); - if (chunk == NULL) { - nouveau_dmem_fini(drm); - return; - } - - chunk->drm = drm; - chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES); - list_add_tail(&chunk->list, &drm->dmem->chunk_empty); - - page = pfn_to_page(chunk->pfn_first); - for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) - page->zone_device_data = chunk; + if (ret) { + kfree(drm->dmem); + drm->dmem = NULL; } - - NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20); - return; -out_free: - kfree(drm->dmem); - drm->dmem = NULL; } -static void -nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, + struct nouveau_svmm *svmm, unsigned long src, + dma_addr_t *dma_addr, u64 *pfn) { - struct nouveau_migrate *migrate = private; - struct nouveau_drm *drm = migrate->drm; struct device *dev = drm->dev->dev; - unsigned long addr, i, npages = 0; - nouveau_migrate_copy_t copy; - int ret; - - /* First allocate new memory */ - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *dpage, *spage; - - dst_pfns[i] = 0; - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) - continue; - - dpage = nouveau_dmem_page_alloc_locked(drm); - if (!dpage) - continue; - - dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED | - MIGRATE_PFN_DEVICE; - npages++; - } - - if (!npages) - return; + struct page *dpage, *spage; + unsigned long paddr; - /* Allocate storage for DMA addresses, so we can unmap later. */ - migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL); - if (!migrate->dma) - goto error; - - /* Copy things over */ - copy = drm->dmem->migrate.copy_func; - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct nouveau_dmem_chunk *chunk; - struct page *spage, *dpage; - u64 src_addr, dst_addr; - - dpage = migrate_pfn_to_page(dst_pfns[i]); - if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; - - chunk = dpage->zone_device_data; - dst_addr = page_to_pfn(dpage) - chunk->pfn_first; - dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset; - - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - - migrate->dma[migrate->dma_nr] = - dma_map_page_attrs(dev, spage, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } + spage = migrate_pfn_to_page(src); + if (!(src & MIGRATE_PFN_MIGRATE)) + goto out; - src_addr = migrate->dma[migrate->dma_nr++]; + dpage = nouveau_dmem_page_alloc_locked(drm); + if (!dpage) + goto out; - ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr, - NOUVEAU_APER_HOST, src_addr); - if (ret) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } + paddr = nouveau_dmem_page_addr(dpage); + if (spage) { + *dma_addr = dma_map_page(dev, spage, 0, page_size(spage), + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) + goto out_free_page; + if (drm->dmem->migrate.copy_func(drm, 1, + NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, *dma_addr)) + goto out_dma_unmap; + } else { + *dma_addr = DMA_MAPPING_ERROR; + if (drm->dmem->migrate.clear_func(drm, page_size(dpage), + NOUVEAU_APER_VRAM, paddr)) + goto out_free_page; } - nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence); - - return; - -error: - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { - struct page *page; - - if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; - - page = migrate_pfn_to_page(dst_pfns[i]); - dst_pfns[i] = MIGRATE_PFN_ERROR; - if (page == NULL) - continue; - - __free_page(page); - } + dpage->zone_device_data = svmm; + *pfn = NVIF_VMM_PFNMAP_V0_V | NVIF_VMM_PFNMAP_V0_VRAM | + ((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT); + if (src & MIGRATE_PFN_WRITE) + *pfn |= NVIF_VMM_PFNMAP_V0_W; + return migrate_pfn(page_to_pfn(dpage)); + +out_dma_unmap: + dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +out_free_page: + nouveau_dmem_page_free_locked(drm, dpage); +out: + *pfn = NVIF_VMM_PFNMAP_V0_NONE; + return 0; } -void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, - const unsigned long *src_pfns, - const unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, + struct nouveau_svmm *svmm, struct migrate_vma *args, + dma_addr_t *dma_addrs, u64 *pfns) { - struct nouveau_migrate *migrate = private; - struct nouveau_drm *drm = migrate->drm; - - if (migrate->fence) { - nouveau_fence_wait(migrate->fence, true, false); - nouveau_fence_unref(&migrate->fence); - } else { - /* - * FIXME wait for channel to be IDLE before finalizing - * the hmem object below (nouveau_migrate_hmem_fini()) ? - */ + struct nouveau_fence *fence; + unsigned long addr = args->start, nr_dma = 0, i; + + for (i = 0; addr < args->end; i++) { + args->dst[i] = nouveau_dmem_migrate_copy_one(drm, svmm, + args->src[i], dma_addrs + nr_dma, pfns + i); + if (!dma_mapping_error(drm->dev->dev, dma_addrs[nr_dma])) + nr_dma++; + addr += PAGE_SIZE; } - while (migrate->dma_nr--) { - dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - } - kfree(migrate->dma); + nouveau_fence_new(drm->dmem->migrate.chan, false, &fence); + migrate_vma_pages(args); + nouveau_dmem_fence_done(&fence); + nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i); - /* - * FIXME optimization: update GPU page table to point to newly - * migrated memory. - */ + while (nr_dma--) { + dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + migrate_vma_finalize(args); } -static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { - .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, - .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, -}; - int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, + struct nouveau_svmm *svmm, struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long *src_pfns, *dst_pfns, npages; - struct nouveau_migrate migrate = {0}; - unsigned long i, c, max; - int ret = 0; - - npages = (end - start) >> PAGE_SHIFT; - max = min(SG_MAX_SINGLE_ALLOC, npages); - src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); - if (src_pfns == NULL) - return -ENOMEM; - dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); - if (dst_pfns == NULL) { - kfree(src_pfns); - return -ENOMEM; - } - - migrate.drm = drm; - migrate.vma = vma; - migrate.npages = npages; - for (i = 0; i < npages; i += c) { - unsigned long next; - - c = min(SG_MAX_SINGLE_ALLOC, npages); - next = start + (c << PAGE_SHIFT); - ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, - next, src_pfns, dst_pfns, &migrate); - if (ret) - goto out; - start = next; - } - -out: - kfree(dst_pfns); - kfree(src_pfns); - return ret; -} - -static inline bool -nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) -{ - return is_device_private_page(page) && drm->dmem == page_to_dmem(page); -} + unsigned long npages = (end - start) >> PAGE_SHIFT; + unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); + dma_addr_t *dma_addrs; + struct migrate_vma args = { + .vma = vma, + .start = start, + .pgmap_owner = drm->dev, + .flags = MIGRATE_VMA_SELECT_SYSTEM, + }; + unsigned long i; + u64 *pfns; + int ret = -ENOMEM; -void -nouveau_dmem_convert_pfn(struct nouveau_drm *drm, - struct hmm_range *range) -{ - unsigned long i, npages; - - npages = (range->end - range->start) >> PAGE_SHIFT; - for (i = 0; i < npages; ++i) { - struct nouveau_dmem_chunk *chunk; - struct page *page; - uint64_t addr; + if (drm->dmem == NULL) + return -ENODEV; - page = hmm_pfn_to_page(range, range->pfns[i]); - if (page == NULL) - continue; + args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL); + if (!args.src) + goto out; + args.dst = kcalloc(max, sizeof(*args.dst), GFP_KERNEL); + if (!args.dst) + goto out_free_src; - if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { - continue; - } + dma_addrs = kmalloc_array(max, sizeof(*dma_addrs), GFP_KERNEL); + if (!dma_addrs) + goto out_free_dst; - if (!nouveau_dmem_page(drm, page)) { - WARN(1, "Some unknown device memory !\n"); - range->pfns[i] = 0; - continue; - } + pfns = nouveau_pfns_alloc(max); + if (!pfns) + goto out_free_dma; - chunk = page->zone_device_data; - addr = page_to_pfn(page) - chunk->pfn_first; - addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT; + for (i = 0; i < npages; i += max) { + args.end = start + (max << PAGE_SHIFT); + ret = migrate_vma_setup(&args); + if (ret) + goto out_free_pfns; - range->pfns[i] &= ((1UL << range->pfn_shift) - 1); - range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; + if (args.cpages) + nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_addrs, + pfns); + args.start = args.end; } + + ret = 0; +out_free_pfns: + nouveau_pfns_free(pfns); +out_free_dma: + kfree(dma_addrs); +out_free_dst: + kfree(args.dst); +out_free_src: + kfree(args.src); +out: + return ret; } |