diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2015-09-03 10:01:44 -0700 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2015-09-03 10:01:44 -0700 |
commit | 01b944fe1cd4e21a2a9ed51adbdbafe2d5e905ba (patch) | |
tree | 85f506cf9fce3d8fd47cf624dd8698472bffc13f /drivers/gpu/drm/nouveau/nvkm/subdev | |
parent | e51e38494a8ecc18650efb0c840600637891de2c (diff) | |
parent | 58f1eae48e5372200553de07b5ecc7574803ee91 (diff) |
Merge branch 'next' into for-linus
Prepare first round of input updates for 4.3 merge window.
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/subdev')
24 files changed, 625 insertions, 184 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c index 1fbd93bbb561..f9d0eb5647fa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c @@ -52,7 +52,7 @@ acpi_read_fast(void *data, u32 offset, u32 length, struct nvkm_bios *bios) u32 start = offset & ~0x00000fff; u32 fetch = limit - start; - if (nvbios_extend(bios, limit) > 0) { + if (nvbios_extend(bios, limit) >= 0) { int ret = nouveau_acpi_get_bios_chunk(bios->data, start, fetch); if (ret == fetch) return fetch; @@ -73,7 +73,7 @@ acpi_read_slow(void *data, u32 offset, u32 length, struct nvkm_bios *bios) u32 start = offset & ~0xfff; u32 fetch = 0; - if (nvbios_extend(bios, limit) > 0) { + if (nvbios_extend(bios, limit) >= 0) { while (start + fetch < limit) { int ret = nouveau_acpi_get_bios_chunk(bios->data, start + fetch, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.c index b8853bf16b23..7622b41619a0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.c @@ -29,7 +29,7 @@ struct nvkm_hwsq { u32 data; struct { u8 data[512]; - u8 size; + u16 size; } c; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h index 3394a5ea8a9f..ebf709c27e3a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h @@ -11,17 +11,34 @@ struct hwsq { struct hwsq_reg { int sequence; bool force; - u32 addr[2]; + u32 addr; + u32 stride; /* in bytes */ + u32 mask; u32 data; }; static inline struct hwsq_reg +hwsq_stride(u32 addr, u32 stride, u32 mask) +{ + return (struct hwsq_reg) { + .sequence = 0, + .force = 0, + .addr = addr, + .stride = stride, + .mask = mask, + .data = 0xdeadbeef, + }; +} + +static inline struct hwsq_reg hwsq_reg2(u32 addr1, u32 addr2) { return (struct hwsq_reg) { .sequence = 0, .force = 0, - .addr = { addr1, addr2 }, + .addr = addr1, + .stride = addr2 - addr1, + .mask = 0x3, .data = 0xdeadbeef, }; } @@ -29,7 +46,14 @@ hwsq_reg2(u32 addr1, u32 addr2) static inline struct hwsq_reg hwsq_reg(u32 addr) { - return hwsq_reg2(addr, addr); + return (struct hwsq_reg) { + .sequence = 0, + .force = 0, + .addr = addr, + .stride = 0, + .mask = 0x1, + .data = 0xdeadbeef, + }; } static inline int @@ -62,18 +86,24 @@ static inline u32 hwsq_rd32(struct hwsq *ram, struct hwsq_reg *reg) { if (reg->sequence != ram->sequence) - reg->data = nv_rd32(ram->subdev, reg->addr[0]); + reg->data = nv_rd32(ram->subdev, reg->addr); return reg->data; } static inline void hwsq_wr32(struct hwsq *ram, struct hwsq_reg *reg, u32 data) { + u32 mask, off = 0; + reg->sequence = ram->sequence; reg->data = data; - if (reg->addr[0] != reg->addr[1]) - nvkm_hwsq_wr32(ram->hwsq, reg->addr[1], reg->data); - nvkm_hwsq_wr32(ram->hwsq, reg->addr[0], reg->data); + + for (mask = reg->mask; mask > 0; mask = (mask & ~1) >> 1) { + if (mask & 1) + nvkm_hwsq_wr32(ram->hwsq, reg->addr+off, reg->data); + + off += reg->stride; + } } static inline void diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c index b24a9cc04b73..39a83d82e0cd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c @@ -184,7 +184,7 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei) nv_debug(clk, "setting performance state %d\n", pstatei); clk->pstate = pstatei; - if (pfb->ram->calc) { + if (pfb->ram && pfb->ram->calc) { int khz = pstate->base.domain[nv_clk_src_mem]; do { ret = pfb->ram->calc(pfb, khz); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c index e8778c67578e..c61102f70805 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c @@ -90,12 +90,14 @@ gf100_devinit_disable(struct nvkm_devinit *devinit) return disable; } -static int +int gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine, struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) { + struct nvkm_devinit_impl *impl = (void *)oclass; struct nv50_devinit_priv *priv; + u64 disable; int ret; ret = nvkm_devinit_create(parent, engine, oclass, &priv); @@ -103,7 +105,8 @@ gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine, if (ret) return ret; - if (nv_rd32(priv, 0x022500) & 0x00000001) + disable = impl->disable(&priv->base); + if (disable & (1ULL << NVDEV_ENGINE_DISP)) priv->base.post = true; return 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c index b345a53e881d..87ca0ece37b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c @@ -48,7 +48,7 @@ struct nvkm_oclass * gm107_devinit_oclass = &(struct nvkm_devinit_impl) { .base.handle = NV_SUBDEV(DEVINIT, 0x07), .base.ofuncs = &(struct nvkm_ofuncs) { - .ctor = nv50_devinit_ctor, + .ctor = gf100_devinit_ctor, .dtor = _nvkm_devinit_dtor, .init = nv50_devinit_init, .fini = _nvkm_devinit_fini, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c index 535172c5f1ad..1076fcf0d716 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c @@ -161,7 +161,7 @@ struct nvkm_oclass * gm204_devinit_oclass = &(struct nvkm_devinit_impl) { .base.handle = NV_SUBDEV(DEVINIT, 0x07), .base.ofuncs = &(struct nvkm_ofuncs) { - .ctor = nv50_devinit_ctor, + .ctor = gf100_devinit_ctor, .dtor = _nvkm_devinit_dtor, .init = nv50_devinit_init, .fini = _nvkm_devinit_fini, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.h index 14a51a9ff7d0..7c63abf11e22 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.h @@ -5,7 +5,7 @@ struct nvkm_pll_vals; struct nv04_devinit_priv { struct nvkm_devinit base; - u8 owner; + int owner; }; int nv04_devinit_ctor(struct nvkm_object *, struct nvkm_object *, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h index b882b65ff3cd..9243521c80ac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h @@ -15,6 +15,9 @@ int nv50_devinit_pll_set(struct nvkm_devinit *, u32, u32); int gt215_devinit_pll_set(struct nvkm_devinit *, u32, u32); +int gf100_devinit_ctor(struct nvkm_object *, struct nvkm_object *, + struct nvkm_oclass *, void *, u32, + struct nvkm_object **); int gf100_devinit_pll_set(struct nvkm_devinit *, u32, u32); u64 gm107_devinit_disable(struct nvkm_devinit *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild index 904d601e8a50..d6be4c6c5408 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild @@ -37,7 +37,6 @@ nvkm-y += nvkm/subdev/fb/ramgt215.o nvkm-y += nvkm/subdev/fb/rammcp77.o nvkm-y += nvkm/subdev/fb/ramgf100.o nvkm-y += nvkm/subdev/fb/ramgk104.o -nvkm-y += nvkm/subdev/fb/ramgk20a.o nvkm-y += nvkm/subdev/fb/ramgm107.o nvkm-y += nvkm/subdev/fb/sddr2.o nvkm-y += nvkm/subdev/fb/sddr3.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c index 16589fa613cd..61fde43dab71 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c @@ -55,9 +55,11 @@ _nvkm_fb_fini(struct nvkm_object *object, bool suspend) struct nvkm_fb *pfb = (void *)object; int ret; - ret = nv_ofuncs(pfb->ram)->fini(nv_object(pfb->ram), suspend); - if (ret && suspend) - return ret; + if (pfb->ram) { + ret = nv_ofuncs(pfb->ram)->fini(nv_object(pfb->ram), suspend); + if (ret && suspend) + return ret; + } return nvkm_subdev_fini(&pfb->base, suspend); } @@ -72,9 +74,11 @@ _nvkm_fb_init(struct nvkm_object *object) if (ret) return ret; - ret = nv_ofuncs(pfb->ram)->init(nv_object(pfb->ram)); - if (ret) - return ret; + if (pfb->ram) { + ret = nv_ofuncs(pfb->ram)->init(nv_object(pfb->ram)); + if (ret) + return ret; + } for (i = 0; i < pfb->tile.regions; i++) pfb->tile.prog(pfb, i, &pfb->tile.region[i]); @@ -91,9 +95,12 @@ _nvkm_fb_dtor(struct nvkm_object *object) for (i = 0; i < pfb->tile.regions; i++) pfb->tile.fini(pfb, i, &pfb->tile.region[i]); nvkm_mm_fini(&pfb->tags); - nvkm_mm_fini(&pfb->vram); - nvkm_object_ref(NULL, (struct nvkm_object **)&pfb->ram); + if (pfb->ram) { + nvkm_mm_fini(&pfb->vram); + nvkm_object_ref(NULL, (struct nvkm_object **)&pfb->ram); + } + nvkm_subdev_destroy(&pfb->base); } @@ -127,6 +134,9 @@ nvkm_fb_create_(struct nvkm_object *parent, struct nvkm_object *engine, pfb->memtype_valid = impl->memtype; + if (!impl->ram) + return 0; + ret = nvkm_object_ctor(nv_object(pfb), NULL, impl->ram, NULL, 0, &ram); if (ret) { nv_fatal(pfb, "error detecting memory configuration!!\n"); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c index 6762847c05e8..a5d7857d3898 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c @@ -65,5 +65,4 @@ gk20a_fb_oclass = &(struct nvkm_fb_impl) { .fini = _nvkm_fb_fini, }, .memtype = gf100_fb_memtype_valid, - .ram = &gk20a_ram_oclass, }.base; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h index d82da02daa1f..485c4b64819a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h @@ -32,7 +32,6 @@ extern struct nvkm_oclass gt215_ram_oclass; extern struct nvkm_oclass mcp77_ram_oclass; extern struct nvkm_oclass gf100_ram_oclass; extern struct nvkm_oclass gk104_ram_oclass; -extern struct nvkm_oclass gk20a_ram_oclass; extern struct nvkm_oclass gm107_ram_oclass; int nvkm_sddr2_calc(struct nvkm_ram *ram); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk20a.c deleted file mode 100644 index 5f30db140b47..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk20a.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include "priv.h" - -#include <core/device.h> - -struct gk20a_mem { - struct nvkm_mem base; - void *cpuaddr; - dma_addr_t handle; -}; -#define to_gk20a_mem(m) container_of(m, struct gk20a_mem, base) - -static void -gk20a_ram_put(struct nvkm_fb *pfb, struct nvkm_mem **pmem) -{ - struct device *dev = nv_device_base(nv_device(pfb)); - struct gk20a_mem *mem = to_gk20a_mem(*pmem); - - *pmem = NULL; - if (unlikely(mem == NULL)) - return; - - if (likely(mem->cpuaddr)) - dma_free_coherent(dev, mem->base.size << PAGE_SHIFT, - mem->cpuaddr, mem->handle); - - kfree(mem->base.pages); - kfree(mem); -} - -static int -gk20a_ram_get(struct nvkm_fb *pfb, u64 size, u32 align, u32 ncmin, - u32 memtype, struct nvkm_mem **pmem) -{ - struct device *dev = nv_device_base(nv_device(pfb)); - struct gk20a_mem *mem; - u32 type = memtype & 0xff; - u32 npages, order; - int i; - - nv_debug(pfb, "%s: size: %llx align: %x, ncmin: %x\n", __func__, size, - align, ncmin); - - npages = size >> PAGE_SHIFT; - if (npages == 0) - npages = 1; - - if (align == 0) - align = PAGE_SIZE; - align >>= PAGE_SHIFT; - - /* round alignment to the next power of 2, if needed */ - order = fls(align); - if ((align & (align - 1)) == 0) - order--; - align = BIT(order); - - /* ensure returned address is correctly aligned */ - npages = max(align, npages); - - mem = kzalloc(sizeof(*mem), GFP_KERNEL); - if (!mem) - return -ENOMEM; - - mem->base.size = npages; - mem->base.memtype = type; - - mem->base.pages = kzalloc(sizeof(dma_addr_t) * npages, GFP_KERNEL); - if (!mem->base.pages) { - kfree(mem); - return -ENOMEM; - } - - *pmem = &mem->base; - - mem->cpuaddr = dma_alloc_coherent(dev, npages << PAGE_SHIFT, - &mem->handle, GFP_KERNEL); - if (!mem->cpuaddr) { - nv_error(pfb, "%s: cannot allocate memory!\n", __func__); - gk20a_ram_put(pfb, pmem); - return -ENOMEM; - } - - align <<= PAGE_SHIFT; - - /* alignment check */ - if (unlikely(mem->handle & (align - 1))) - nv_warn(pfb, "memory not aligned as requested: %pad (0x%x)\n", - &mem->handle, align); - - nv_debug(pfb, "alloc size: 0x%x, align: 0x%x, paddr: %pad, vaddr: %p\n", - npages << PAGE_SHIFT, align, &mem->handle, mem->cpuaddr); - - for (i = 0; i < npages; i++) - mem->base.pages[i] = mem->handle + (PAGE_SIZE * i); - - mem->base.offset = (u64)mem->base.pages[0]; - return 0; -} - -static int -gk20a_ram_ctor(struct nvkm_object *parent, struct nvkm_object *engine, - struct nvkm_oclass *oclass, void *data, u32 datasize, - struct nvkm_object **pobject) -{ - struct nvkm_ram *ram; - int ret; - - ret = nvkm_ram_create(parent, engine, oclass, &ram); - *pobject = nv_object(ram); - if (ret) - return ret; - ram->type = NV_MEM_TYPE_STOLEN; - ram->size = get_num_physpages() << PAGE_SHIFT; - - ram->get = gk20a_ram_get; - ram->put = gk20a_ram_put; - return 0; -} - -struct nvkm_oclass -gk20a_ram_oclass = { - .ofuncs = &(struct nvkm_ofuncs) { - .ctor = gk20a_ram_ctor, - .dtor = _nvkm_ram_dtor, - .init = _nvkm_ram_init, - .fini = _nvkm_ram_fini, - }, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fuse/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fuse/gm107.c index ba19158a5912..0b256aa4960f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fuse/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fuse/gm107.c @@ -45,10 +45,8 @@ gm107_fuse_ctor(struct nvkm_object *parent, struct nvkm_object *engine, ret = nvkm_fuse_create(parent, engine, oclass, &priv); *pobject = nv_object(priv); - if (ret) - return ret; - return 0; + return ret; } struct nvkm_oclass diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/Kbuild index e6f35abe7879..13bb7fc0a569 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/Kbuild @@ -2,3 +2,4 @@ nvkm-y += nvkm/subdev/instmem/base.o nvkm-y += nvkm/subdev/instmem/nv04.o nvkm-y += nvkm/subdev/instmem/nv40.o nvkm-y += nvkm/subdev/instmem/nv50.o +nvkm-y += nvkm/subdev/instmem/gk20a.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c new file mode 100644 index 000000000000..dd0994d9ebfc --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -0,0 +1,440 @@ +/* + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * GK20A does not have dedicated video memory, and to accurately represent this + * fact Nouveau will not create a RAM device for it. Therefore its instmem + * implementation must be done directly on top of system memory, while providing + * coherent read and write operations. + * + * Instmem can be allocated through two means: + * 1) If an IOMMU mapping has been probed, the IOMMU API is used to make memory + * pages contiguous to the GPU. This is the preferred way. + * 2) If no IOMMU mapping is probed, the DMA API is used to allocate physically + * contiguous memory. + * + * In both cases CPU read and writes are performed using PRAMIN (i.e. using the + * GPU path) to ensure these operations are coherent for the GPU. This allows us + * to use more "relaxed" allocation parameters when using the DMA API, since we + * never need a kernel mapping. + */ + +#include <subdev/fb.h> +#include <core/mm.h> +#include <core/device.h> + +#ifdef __KERNEL__ +#include <linux/dma-attrs.h> +#include <linux/iommu.h> +#include <nouveau_platform.h> +#endif + +#include "priv.h" + +struct gk20a_instobj_priv { + struct nvkm_instobj base; + /* Must be second member here - see nouveau_gpuobj_map_vm() */ + struct nvkm_mem *mem; + /* Pointed by mem */ + struct nvkm_mem _mem; +}; + +/* + * Used for objects allocated using the DMA API + */ +struct gk20a_instobj_dma { + struct gk20a_instobj_priv base; + + void *cpuaddr; + dma_addr_t handle; + struct nvkm_mm_node r; +}; + +/* + * Used for objects flattened using the IOMMU API + */ +struct gk20a_instobj_iommu { + struct gk20a_instobj_priv base; + + /* array of base.mem->size pages */ + struct page *pages[]; +}; + +struct gk20a_instmem_priv { + struct nvkm_instmem base; + spinlock_t lock; + u64 addr; + + /* Only used if IOMMU if present */ + struct mutex *mm_mutex; + struct nvkm_mm *mm; + struct iommu_domain *domain; + unsigned long iommu_pgshift; + + /* Only used by DMA API */ + struct dma_attrs attrs; +}; + +/* + * Use PRAMIN to read/write data and avoid coherency issues. + * PRAMIN uses the GPU path and ensures data will always be coherent. + * + * A dynamic mapping based solution would be desirable in the future, but + * the issue remains of how to maintain coherency efficiently. On ARM it is + * not easy (if possible at all?) to create uncached temporary mappings. + */ + +static u32 +gk20a_instobj_rd32(struct nvkm_object *object, u64 offset) +{ + struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(object); + struct gk20a_instobj_priv *node = (void *)object; + unsigned long flags; + u64 base = (node->mem->offset + offset) & 0xffffff00000ULL; + u64 addr = (node->mem->offset + offset) & 0x000000fffffULL; + u32 data; + + spin_lock_irqsave(&priv->lock, flags); + if (unlikely(priv->addr != base)) { + nv_wr32(priv, 0x001700, base >> 16); + priv->addr = base; + } + data = nv_rd32(priv, 0x700000 + addr); + spin_unlock_irqrestore(&priv->lock, flags); + return data; +} + +static void +gk20a_instobj_wr32(struct nvkm_object *object, u64 offset, u32 data) +{ + struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(object); + struct gk20a_instobj_priv *node = (void *)object; + unsigned long flags; + u64 base = (node->mem->offset + offset) & 0xffffff00000ULL; + u64 addr = (node->mem->offset + offset) & 0x000000fffffULL; + + spin_lock_irqsave(&priv->lock, flags); + if (unlikely(priv->addr != base)) { + nv_wr32(priv, 0x001700, base >> 16); + priv->addr = base; + } + nv_wr32(priv, 0x700000 + addr, data); + spin_unlock_irqrestore(&priv->lock, flags); +} + +static void +gk20a_instobj_dtor_dma(struct gk20a_instobj_priv *_node) +{ + struct gk20a_instobj_dma *node = (void *)_node; + struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(node); + struct device *dev = nv_device_base(nv_device(priv)); + + if (unlikely(!node->cpuaddr)) + return; + + dma_free_attrs(dev, _node->mem->size << PAGE_SHIFT, node->cpuaddr, + node->handle, &priv->attrs); +} + +static void +gk20a_instobj_dtor_iommu(struct gk20a_instobj_priv *_node) +{ + struct gk20a_instobj_iommu *node = (void *)_node; + struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(node); + struct nvkm_mm_node *r; + int i; + + if (unlikely(list_empty(&_node->mem->regions))) + return; + + r = list_first_entry(&_node->mem->regions, struct nvkm_mm_node, + rl_entry); + + /* clear bit 34 to unmap pages */ + r->offset &= ~BIT(34 - priv->iommu_pgshift); + + /* Unmap pages from GPU address space and free them */ + for (i = 0; i < _node->mem->size; i++) { + iommu_unmap(priv->domain, + (r->offset + i) << priv->iommu_pgshift, PAGE_SIZE); + __free_page(node->pages[i]); + } + + /* Release area from GPU address space */ + mutex_lock(priv->mm_mutex); + nvkm_mm_free(priv->mm, &r); + mutex_unlock(priv->mm_mutex); +} + +static void +gk20a_instobj_dtor(struct nvkm_object *object) +{ + struct gk20a_instobj_priv *node = (void *)object; + struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(node); + + if (priv->domain) + gk20a_instobj_dtor_iommu(node); + else + gk20a_instobj_dtor_dma(node); + + nvkm_instobj_destroy(&node->base); +} + +static int +gk20a_instobj_ctor_dma(struct nvkm_object *parent, struct nvkm_object *engine, + struct nvkm_oclass *oclass, u32 npages, u32 align, + struct gk20a_instobj_priv **_node) +{ + struct gk20a_instobj_dma *node; + struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(parent); + struct device *dev = nv_device_base(nv_device(parent)); + int ret; + + ret = nvkm_instobj_create_(parent, engine, oclass, sizeof(*node), + (void **)&node); + *_node = &node->base; + if (ret) + return ret; + + node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, + &node->handle, GFP_KERNEL, + &priv->attrs); + if (!node->cpuaddr) { + nv_error(priv, "cannot allocate DMA memory\n"); + return -ENOMEM; + } + + /* alignment check */ + if (unlikely(node->handle & (align - 1))) + nv_warn(priv, "memory not aligned as requested: %pad (0x%x)\n", + &node->handle, align); + + /* present memory for being mapped using small pages */ + node->r.type = 12; + node->r.offset = node->handle >> 12; + node->r.length = (npages << PAGE_SHIFT) >> 12; + + node->base._mem.offset = node->handle; + + INIT_LIST_HEAD(&node->base._mem.regions); + list_add_tail(&node->r.rl_entry, &node->base._mem.regions); + + return 0; +} + +static int +gk20a_instobj_ctor_iommu(struct nvkm_object *parent, struct nvkm_object *engine, + struct nvkm_oclass *oclass, u32 npages, u32 align, + struct gk20a_instobj_priv **_node) +{ + struct gk20a_instobj_iommu *node; + struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(parent); + struct nvkm_mm_node *r; + int ret; + int i; + + ret = nvkm_instobj_create_(parent, engine, oclass, + sizeof(*node) + sizeof(node->pages[0]) * npages, + (void **)&node); + *_node = &node->base; + if (ret) + return ret; + + /* Allocate backing memory */ + for (i = 0; i < npages; i++) { + struct page *p = alloc_page(GFP_KERNEL); + + if (p == NULL) { + ret = -ENOMEM; + goto free_pages; + } + node->pages[i] = p; + } + + mutex_lock(priv->mm_mutex); + /* Reserve area from GPU address space */ + ret = nvkm_mm_head(priv->mm, 0, 1, npages, npages, + align >> priv->iommu_pgshift, &r); + mutex_unlock(priv->mm_mutex); + if (ret) { + nv_error(priv, "virtual space is full!\n"); + goto free_pages; + } + + /* Map into GPU address space */ + for (i = 0; i < npages; i++) { + struct page *p = node->pages[i]; + u32 offset = (r->offset + i) << priv->iommu_pgshift; + + ret = iommu_map(priv->domain, offset, page_to_phys(p), + PAGE_SIZE, IOMMU_READ | IOMMU_WRITE); + if (ret < 0) { + nv_error(priv, "IOMMU mapping failure: %d\n", ret); + + while (i-- > 0) { + offset -= PAGE_SIZE; + iommu_unmap(priv->domain, offset, PAGE_SIZE); + } + goto release_area; + } + } + + /* Bit 34 tells that an address is to be resolved through the IOMMU */ + r->offset |= BIT(34 - priv->iommu_pgshift); + + node->base._mem.offset = ((u64)r->offset) << priv->iommu_pgshift; + + INIT_LIST_HEAD(&node->base._mem.regions); + list_add_tail(&r->rl_entry, &node->base._mem.regions); + + return 0; + +release_area: + mutex_lock(priv->mm_mutex); + nvkm_mm_free(priv->mm, &r); + mutex_unlock(priv->mm_mutex); + +free_pages: + for (i = 0; i < npages && node->pages[i] != NULL; i++) + __free_page(node->pages[i]); + + return ret; +} + +static int +gk20a_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine, + struct nvkm_oclass *oclass, void *data, u32 _size, + struct nvkm_object **pobject) +{ + struct nvkm_instobj_args *args = data; + struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(parent); + struct gk20a_instobj_priv *node; + u32 size, align; + int ret; + + nv_debug(parent, "%s (%s): size: %x align: %x\n", __func__, + priv->domain ? "IOMMU" : "DMA", args->size, args->align); + + /* Round size and align to page bounds */ + size = max(roundup(args->size, PAGE_SIZE), PAGE_SIZE); + align = max(roundup(args->align, PAGE_SIZE), PAGE_SIZE); + + if (priv->domain) + ret = gk20a_instobj_ctor_iommu(parent, engine, oclass, + size >> PAGE_SHIFT, align, &node); + else + ret = gk20a_instobj_ctor_dma(parent, engine, oclass, + size >> PAGE_SHIFT, align, &node); + *pobject = nv_object(node); + if (ret) + return ret; + + node->mem = &node->_mem; + + /* present memory for being mapped using small pages */ + node->mem->size = size >> 12; + node->mem->memtype = 0; + node->mem->page_shift = 12; + + node->base.addr = node->mem->offset; + node->base.size = size; + + nv_debug(parent, "alloc size: 0x%x, align: 0x%x, gaddr: 0x%llx\n", + size, align, node->mem->offset); + + return 0; +} + +static struct nvkm_instobj_impl +gk20a_instobj_oclass = { + .base.ofuncs = &(struct nvkm_ofuncs) { + .ctor = gk20a_instobj_ctor, + .dtor = gk20a_instobj_dtor, + .init = _nvkm_instobj_init, + .fini = _nvkm_instobj_fini, + .rd32 = gk20a_instobj_rd32, + .wr32 = gk20a_instobj_wr32, + }, +}; + + + +static int +gk20a_instmem_fini(struct nvkm_object *object, bool suspend) +{ + struct gk20a_instmem_priv *priv = (void *)object; + priv->addr = ~0ULL; + return nvkm_instmem_fini(&priv->base, suspend); +} + +static int +gk20a_instmem_ctor(struct nvkm_object *parent, struct nvkm_object *engine, + struct nvkm_oclass *oclass, void *data, u32 size, + struct nvkm_object **pobject) +{ + struct gk20a_instmem_priv *priv; + struct nouveau_platform_device *plat; + int ret; + + ret = nvkm_instmem_create(parent, engine, oclass, &priv); + *pobject = nv_object(priv); + if (ret) + return ret; + + spin_lock_init(&priv->lock); + + plat = nv_device_to_platform(nv_device(parent)); + if (plat->gpu->iommu.domain) { + priv->domain = plat->gpu->iommu.domain; + priv->mm = plat->gpu->iommu.mm; + priv->iommu_pgshift = plat->gpu->iommu.pgshift; + priv->mm_mutex = &plat->gpu->iommu.mutex; + + nv_info(priv, "using IOMMU\n"); + } else { + init_dma_attrs(&priv->attrs); + /* + * We will access instmem through PRAMIN and thus do not need a + * consistent CPU pointer or kernel mapping + */ + dma_set_attr(DMA_ATTR_NON_CONSISTENT, &priv->attrs); + dma_set_attr(DMA_ATTR_WEAK_ORDERING, &priv->attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &priv->attrs); + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &priv->attrs); + + nv_info(priv, "using DMA API\n"); + } + + return 0; +} + +struct nvkm_oclass * +gk20a_instmem_oclass = &(struct nvkm_instmem_impl) { + .base.handle = NV_SUBDEV(INSTMEM, 0xea), + .base.ofuncs = &(struct nvkm_ofuncs) { + .ctor = gk20a_instmem_ctor, + .dtor = _nvkm_instmem_dtor, + .init = _nvkm_instmem_init, + .fini = gk20a_instmem_fini, + }, + .instobj = &gk20a_instobj_oclass.base, +}.base; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c index 8e7cc6200d60..7fb5ea0314cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c @@ -136,7 +136,8 @@ gf100_ltc_dtor(struct nvkm_object *object) struct nvkm_ltc_priv *priv = (void *)object; nvkm_mm_fini(&priv->tags); - nvkm_mm_free(&pfb->vram, &priv->tag_ram); + if (pfb->ram) + nvkm_mm_free(&pfb->vram, &priv->tag_ram); nvkm_ltc_destroy(priv); } @@ -149,6 +150,12 @@ gf100_ltc_init_tag_ram(struct nvkm_fb *pfb, struct nvkm_ltc_priv *priv) u32 tag_size, tag_margin, tag_align; int ret; + /* No VRAM, no tags for now. */ + if (!pfb->ram) { + priv->num_tags = 0; + goto mm_init; + } + /* tags for 1/4 of VRAM should be enough (8192/4 per GiB of VRAM) */ priv->num_tags = (pfb->ram->size >> 17) / 4; if (priv->num_tags > (1 << 17)) @@ -183,6 +190,7 @@ gf100_ltc_init_tag_ram(struct nvkm_fb *pfb, struct nvkm_ltc_priv *priv) priv->tag_base = tag_base; } +mm_init: ret = nvkm_mm_init(&priv->tags, 0, priv->num_tags, 1); return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/nv50.c index 42cac13ca629..f20e4ca87e17 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/nv50.c @@ -182,7 +182,7 @@ mxm_show_unmatched(struct nvkm_mxm *mxm, u8 *data, void *info) { u64 desc = *(u64 *)data; if ((desc & 0xf0) != 0xf0) - nv_info(mxm, "unmatched output device 0x%016llx\n", desc); + nv_info(mxm, "unmatched output device 0x%016llx\n", desc); return true; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild index 9a150d520225..7081d6a9b95f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild @@ -4,5 +4,6 @@ nvkm-y += nvkm/subdev/pmu/gt215.o nvkm-y += nvkm/subdev/pmu/gf100.o nvkm-y += nvkm/subdev/pmu/gf110.o nvkm-y += nvkm/subdev/pmu/gk104.o +nvkm-y += nvkm/subdev/pmu/gk110.o nvkm-y += nvkm/subdev/pmu/gk208.o nvkm-y += nvkm/subdev/pmu/gk20a.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk110.c new file mode 100644 index 000000000000..89bb94b0af8b --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk110.c @@ -0,0 +1,95 @@ +/* + * Copyright 2015 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#define gf110_pmu_code gk110_pmu_code +#define gf110_pmu_data gk110_pmu_data +#include "priv.h" +#include "fuc/gf110.fuc4.h" + +#include <subdev/timer.h> + +void +gk110_pmu_pgob(struct nvkm_pmu *pmu, bool enable) +{ + static const struct { + u32 addr; + u32 data; + } magic[] = { + { 0x020520, 0xfffffffc }, + { 0x020524, 0xfffffffe }, + { 0x020524, 0xfffffffc }, + { 0x020524, 0xfffffff8 }, + { 0x020524, 0xffffffe0 }, + { 0x020530, 0xfffffffe }, + { 0x02052c, 0xfffffffa }, + { 0x02052c, 0xfffffff0 }, + { 0x02052c, 0xffffffc0 }, + { 0x02052c, 0xffffff00 }, + { 0x02052c, 0xfffffc00 }, + { 0x02052c, 0xfffcfc00 }, + { 0x02052c, 0xfff0fc00 }, + { 0x02052c, 0xff80fc00 }, + { 0x020528, 0xfffffffe }, + { 0x020528, 0xfffffffc }, + }; + int i; + + nv_mask(pmu, 0x000200, 0x00001000, 0x00000000); + nv_rd32(pmu, 0x000200); + nv_mask(pmu, 0x000200, 0x08000000, 0x08000000); + msleep(50); + + nv_mask(pmu, 0x10a78c, 0x00000002, 0x00000002); + nv_mask(pmu, 0x10a78c, 0x00000001, 0x00000001); + nv_mask(pmu, 0x10a78c, 0x00000001, 0x00000000); + + nv_mask(pmu, 0x0206b4, 0x00000000, 0x00000000); + for (i = 0; i < ARRAY_SIZE(magic); i++) { + nv_wr32(pmu, magic[i].addr, magic[i].data); + nv_wait(pmu, magic[i].addr, 0x80000000, 0x00000000); + } + + nv_mask(pmu, 0x10a78c, 0x00000002, 0x00000000); + nv_mask(pmu, 0x10a78c, 0x00000001, 0x00000001); + nv_mask(pmu, 0x10a78c, 0x00000001, 0x00000000); + + nv_mask(pmu, 0x000200, 0x08000000, 0x00000000); + nv_mask(pmu, 0x000200, 0x00001000, 0x00001000); + nv_rd32(pmu, 0x000200); +} + +struct nvkm_oclass * +gk110_pmu_oclass = &(struct nvkm_pmu_impl) { + .base.handle = NV_SUBDEV(PMU, 0xf0), + .base.ofuncs = &(struct nvkm_ofuncs) { + .ctor = _nvkm_pmu_ctor, + .dtor = _nvkm_pmu_dtor, + .init = _nvkm_pmu_init, + .fini = _nvkm_pmu_fini, + }, + .code.data = gk110_pmu_code, + .code.size = sizeof(gk110_pmu_code), + .data.data = gk110_pmu_data, + .data.size = sizeof(gk110_pmu_data), + .pgob = gk110_pmu_pgob, +}.base; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk208.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk208.c index 6f9c09af1a49..b14134ef9ea5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk208.c @@ -37,4 +37,5 @@ gk208_pmu_oclass = &(struct nvkm_pmu_impl) { .code.size = sizeof(gk208_pmu_code), .data.data = gk208_pmu_data, .data.size = sizeof(gk208_pmu_data), + .pgob = gk110_pmu_pgob, }.base; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c index a49934bbe637..594f746e68f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c @@ -159,7 +159,7 @@ resched: nvkm_timer_alarm(priv, 100000000, alarm); } -int +static int gk20a_pmu_fini(struct nvkm_object *object, bool suspend) { struct nvkm_pmu *pmu = (void *)object; @@ -170,7 +170,7 @@ gk20a_pmu_fini(struct nvkm_object *object, bool suspend) return nvkm_subdev_fini(&pmu->base, suspend); } -int +static int gk20a_pmu_init(struct nvkm_object *object) { struct nvkm_pmu *pmu = (void *)object; @@ -192,7 +192,8 @@ gk20a_pmu_init(struct nvkm_object *object) return ret; } -struct gk20a_pmu_dvfs_data gk20a_dvfs_data= { +static struct gk20a_pmu_dvfs_data +gk20a_dvfs_data= { .p_load_target = 70, .p_load_max = 90, .p_smooth = 1, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h index 998410563bfd..799e7c8b88f5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h @@ -40,4 +40,6 @@ struct nvkm_pmu_impl { void (*pgob)(struct nvkm_pmu *, bool); }; + +void gk110_pmu_pgob(struct nvkm_pmu *, bool); #endif |