From 9da3da660d8c19a54f6e93361d147509be3fff84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jun 2012 15:20:22 +0100 Subject: drm/i915: Replace the array of pages with a scatterlist Rather than have multiple data structures for describing our page layout in conjunction with the array of pages, we can migrate all users over to a scatterlist. One major advantage, other than unifying the page tracking structures, this offers is that we replace the vmalloc'ed array (which can be up to a megabyte in size) with a chain of individual pages which helps reduce memory pressure. The disadvantage is that we then do not have a simple array to iterate, or to access randomly. The common case for this is in the relocation processing, which will typically fit within a single scatterlist page and so be almost the same cost as the simple array. For iterating over the array, the extra function call could be optimised away, but in reality is an insignificant cost of either binding the pages, or performing the pwrite/pread. v2: Fix drm_clflush_sg() to not invoke wbinvd as well! And fix the trivial compile error from rebasing. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/char/agp/intel-gtt.c | 51 +++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 29 deletions(-) (limited to 'drivers/char/agp') diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 58e32f7c3229..7fa655ac24d8 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -84,40 +84,33 @@ static struct _intel_private { #define IS_IRONLAKE intel_private.driver->is_ironlake #define HAS_PGTBL_EN intel_private.driver->has_pgtbl_enable -int intel_gtt_map_memory(struct page **pages, unsigned int num_entries, - struct scatterlist **sg_list, int *num_sg) +static int intel_gtt_map_memory(struct page **pages, + unsigned int num_entries, + struct sg_table *st) { - struct sg_table st; struct scatterlist *sg; int i; - if (*sg_list) - return 0; /* already mapped (for e.g. resume */ - DBG("try mapping %lu pages\n", (unsigned long)num_entries); - if (sg_alloc_table(&st, num_entries, GFP_KERNEL)) + if (sg_alloc_table(st, num_entries, GFP_KERNEL)) goto err; - *sg_list = sg = st.sgl; - - for (i = 0 ; i < num_entries; i++, sg = sg_next(sg)) + for_each_sg(st->sgl, sg, num_entries, i) sg_set_page(sg, pages[i], PAGE_SIZE, 0); - *num_sg = pci_map_sg(intel_private.pcidev, *sg_list, - num_entries, PCI_DMA_BIDIRECTIONAL); - if (unlikely(!*num_sg)) + if (!pci_map_sg(intel_private.pcidev, + st->sgl, st->nents, PCI_DMA_BIDIRECTIONAL)) goto err; return 0; err: - sg_free_table(&st); + sg_free_table(st); return -ENOMEM; } -EXPORT_SYMBOL(intel_gtt_map_memory); -void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg) +static void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg) { struct sg_table st; DBG("try unmapping %lu pages\n", (unsigned long)mem->page_count); @@ -130,7 +123,6 @@ void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg) sg_free_table(&st); } -EXPORT_SYMBOL(intel_gtt_unmap_memory); static void intel_fake_agp_enable(struct agp_bridge_data *bridge, u32 mode) { @@ -879,8 +871,7 @@ static bool i830_check_flags(unsigned int flags) return false; } -void intel_gtt_insert_sg_entries(struct scatterlist *sg_list, - unsigned int sg_len, +void intel_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags) { @@ -892,12 +883,11 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list, /* sg may merge pages, but we have to separate * per-page addr for GTT */ - for_each_sg(sg_list, sg, sg_len, i) { + for_each_sg(st->sgl, sg, st->nents, i) { len = sg_dma_len(sg) >> PAGE_SHIFT; for (m = 0; m < len; m++) { dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT); - intel_private.driver->write_entry(addr, - j, flags); + intel_private.driver->write_entry(addr, j, flags); j++; } } @@ -905,8 +895,10 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list, } EXPORT_SYMBOL(intel_gtt_insert_sg_entries); -void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries, - struct page **pages, unsigned int flags) +static void intel_gtt_insert_pages(unsigned int first_entry, + unsigned int num_entries, + struct page **pages, + unsigned int flags) { int i, j; @@ -917,7 +909,6 @@ void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries, } readl(intel_private.gtt+j-1); } -EXPORT_SYMBOL(intel_gtt_insert_pages); static int intel_fake_agp_insert_entries(struct agp_memory *mem, off_t pg_start, int type) @@ -953,13 +944,15 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem, global_cache_flush(); if (intel_private.base.needs_dmar) { - ret = intel_gtt_map_memory(mem->pages, mem->page_count, - &mem->sg_list, &mem->num_sg); + struct sg_table st; + + ret = intel_gtt_map_memory(mem->pages, mem->page_count, &st); if (ret != 0) return ret; - intel_gtt_insert_sg_entries(mem->sg_list, mem->num_sg, - pg_start, type); + intel_gtt_insert_sg_entries(&st, pg_start, type); + mem->sg_list = st.sgl; + mem->num_sg = st.nents; } else intel_gtt_insert_pages(pg_start, mem->page_count, mem->pages, type); -- cgit v1.2.3 From edef7e685da05c13cce50c0126189c80fe2c8f71 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 14 Sep 2012 11:57:47 +0100 Subject: agp/intel: Use a write-combining map for updating PTEs Rewriting the PTE entries using an WC mapping is roughly an order of magnitude faster than through the uncached mapping. This makes an observable difference on workloads that cycle through large numbers of buffers, for example Chromium using ShmPixmaps where virtually all the CPU time is currently spent rebinding the userptr. v2: Limit the WC mapping to older generations as we have observed that the TLB invalidation on SandyBridge+ is unreliable with WC updates. See i-g-t/tests/gem_gtt_cpu_tlb Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/char/agp/intel-gtt.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/char/agp') diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 7fa655ac24d8..e01f5eaaec82 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -666,9 +666,14 @@ static int intel_gtt_init(void) gtt_map_size = intel_private.base.gtt_total_entries * 4; - intel_private.gtt = ioremap(intel_private.gtt_bus_addr, - gtt_map_size); - if (!intel_private.gtt) { + intel_private.gtt = NULL; + if (INTEL_GTT_GEN < 6) + intel_private.gtt = ioremap_wc(intel_private.gtt_bus_addr, + gtt_map_size); + if (intel_private.gtt == NULL) + intel_private.gtt = ioremap(intel_private.gtt_bus_addr, + gtt_map_size); + if (intel_private.gtt == NULL) { intel_private.driver->cleanup(); iounmap(intel_private.registers); return -ENOMEM; -- cgit v1.2.3