blob: b5f81b02205c2c5a95abfb1cd94ae5037322b2be [file] [log] [blame]
Ross Zwisler9e853f22015-04-01 09:12:19 +02001/*
2 * Persistent Memory Driver
3 *
Dan Williams9f53f9f2015-06-09 15:33:45 -04004 * Copyright (c) 2014-2015, Intel Corporation.
Ross Zwisler9e853f22015-04-01 09:12:19 +02005 * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
6 * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include <asm/cacheflush.h>
19#include <linux/blkdev.h>
20#include <linux/hdreg.h>
21#include <linux/init.h>
22#include <linux/platform_device.h>
23#include <linux/module.h>
24#include <linux/moduleparam.h>
Dan Williamsb95f5f42016-01-04 23:50:23 -080025#include <linux/badblocks.h>
Dan Williams9476df72016-01-15 16:56:19 -080026#include <linux/memremap.h>
Dan Williams32ab0a3f2015-08-01 02:16:37 -040027#include <linux/vmalloc.h>
Dan Williams34c0fd52016-01-15 16:56:14 -080028#include <linux/pfn_t.h>
Ross Zwisler9e853f22015-04-01 09:12:19 +020029#include <linux/slab.h>
Ross Zwisler61031952015-06-25 03:08:39 -040030#include <linux/pmem.h>
Dan Williams9f53f9f2015-06-09 15:33:45 -040031#include <linux/nd.h>
Dan Williams32ab0a3f2015-08-01 02:16:37 -040032#include "pfn.h"
Dan Williams9f53f9f2015-06-09 15:33:45 -040033#include "nd.h"
Ross Zwisler9e853f22015-04-01 09:12:19 +020034
35struct pmem_device {
36 struct request_queue *pmem_queue;
37 struct gendisk *pmem_disk;
38
39 /* One contiguous memory region per device */
40 phys_addr_t phys_addr;
Dan Williams32ab0a3f2015-08-01 02:16:37 -040041 /* when non-zero this device is hosting a 'pfn' instance */
42 phys_addr_t data_offset;
Arnd Bergmannc4544202016-02-22 22:58:34 +010043 u64 pfn_flags;
Ross Zwisler61031952015-06-25 03:08:39 -040044 void __pmem *virt_addr;
Dan Williamscfe30b82016-03-03 09:38:00 -080045 /* immutable base size of the namespace */
Ross Zwisler9e853f22015-04-01 09:12:19 +020046 size_t size;
Dan Williamscfe30b82016-03-03 09:38:00 -080047 /* trim size when namespace capacity has been section aligned */
48 u32 pfn_pad;
Dan Williamsb95f5f42016-01-04 23:50:23 -080049 struct badblocks bb;
Ross Zwisler9e853f22015-04-01 09:12:19 +020050};
51
Dan Williams59e64732016-03-08 07:16:07 -080052static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
53 unsigned int len)
54{
55 struct device *dev = disk_to_dev(pmem->pmem_disk);
56 sector_t sector;
57 long cleared;
58
59 sector = (offset - pmem->data_offset) / 512;
60 cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
61
62 if (cleared > 0 && cleared / 512) {
63 dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
64 __func__, (unsigned long long) sector,
65 cleared / 512, cleared / 512 > 1 ? "s" : "");
66 badblocks_clear(&pmem->bb, sector, cleared / 512);
67 }
68 invalidate_pmem(pmem->virt_addr + offset, len);
69}
70
Dan Williamse10624f2016-01-06 12:03:41 -080071static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
Ross Zwisler9e853f22015-04-01 09:12:19 +020072 unsigned int len, unsigned int off, int rw,
73 sector_t sector)
74{
Dan Williamsb5ebc8e2016-03-06 15:20:51 -080075 int rc = 0;
Dan Williams59e64732016-03-08 07:16:07 -080076 bool bad_pmem = false;
Ross Zwisler9e853f22015-04-01 09:12:19 +020077 void *mem = kmap_atomic(page);
Dan Williams32ab0a3f2015-08-01 02:16:37 -040078 phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
Ross Zwisler61031952015-06-25 03:08:39 -040079 void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
Ross Zwisler9e853f22015-04-01 09:12:19 +020080
Dan Williams59e64732016-03-08 07:16:07 -080081 if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
82 bad_pmem = true;
83
Ross Zwisler9e853f22015-04-01 09:12:19 +020084 if (rw == READ) {
Dan Williams59e64732016-03-08 07:16:07 -080085 if (unlikely(bad_pmem))
Dan Williamsb5ebc8e2016-03-06 15:20:51 -080086 rc = -EIO;
87 else {
Dan Williamsfc0c2022016-03-08 10:30:19 -080088 rc = memcpy_from_pmem(mem + off, pmem_addr, len);
Dan Williamsb5ebc8e2016-03-06 15:20:51 -080089 flush_dcache_page(page);
90 }
Ross Zwisler9e853f22015-04-01 09:12:19 +020091 } else {
Dan Williams0a370d262016-04-14 19:40:47 -070092 /*
93 * Note that we write the data both before and after
94 * clearing poison. The write before clear poison
95 * handles situations where the latest written data is
96 * preserved and the clear poison operation simply marks
97 * the address range as valid without changing the data.
98 * In this case application software can assume that an
99 * interrupted write will either return the new good
100 * data or an error.
101 *
102 * However, if pmem_clear_poison() leaves the data in an
103 * indeterminate state we need to perform the write
104 * after clear poison.
105 */
Ross Zwisler9e853f22015-04-01 09:12:19 +0200106 flush_dcache_page(page);
Ross Zwisler61031952015-06-25 03:08:39 -0400107 memcpy_to_pmem(pmem_addr, mem + off, len);
Dan Williams59e64732016-03-08 07:16:07 -0800108 if (unlikely(bad_pmem)) {
109 pmem_clear_poison(pmem, pmem_off, len);
110 memcpy_to_pmem(pmem_addr, mem + off, len);
111 }
Ross Zwisler9e853f22015-04-01 09:12:19 +0200112 }
113
114 kunmap_atomic(mem);
Dan Williamsb5ebc8e2016-03-06 15:20:51 -0800115 return rc;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200116}
117
Jens Axboedece1632015-11-05 10:41:16 -0700118static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
Ross Zwisler9e853f22015-04-01 09:12:19 +0200119{
Dan Williamse10624f2016-01-06 12:03:41 -0800120 int rc = 0;
Dan Williamsf0dc0892015-05-16 12:28:53 -0400121 bool do_acct;
122 unsigned long start;
Dan Williamsedc870e2015-05-16 12:28:51 -0400123 struct bio_vec bvec;
124 struct bvec_iter iter;
Dan Williamsbd842b82016-03-18 23:47:43 -0700125 struct pmem_device *pmem = q->queuedata;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200126
Dan Williamsf0dc0892015-05-16 12:28:53 -0400127 do_acct = nd_iostat_start(bio, &start);
Dan Williamse10624f2016-01-06 12:03:41 -0800128 bio_for_each_segment(bvec, bio, iter) {
129 rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
130 bvec.bv_offset, bio_data_dir(bio),
131 iter.bi_sector);
132 if (rc) {
133 bio->bi_error = rc;
134 break;
135 }
136 }
Dan Williamsf0dc0892015-05-16 12:28:53 -0400137 if (do_acct)
138 nd_iostat_end(bio, start);
Ross Zwisler61031952015-06-25 03:08:39 -0400139
140 if (bio_data_dir(bio))
141 wmb_pmem();
142
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200143 bio_endio(bio);
Jens Axboedece1632015-11-05 10:41:16 -0700144 return BLK_QC_T_NONE;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200145}
146
147static int pmem_rw_page(struct block_device *bdev, sector_t sector,
148 struct page *page, int rw)
149{
Dan Williamsbd842b82016-03-18 23:47:43 -0700150 struct pmem_device *pmem = bdev->bd_queue->queuedata;
Dan Williamse10624f2016-01-06 12:03:41 -0800151 int rc;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200152
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300153 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
Ross Zwislerba8fe0f2015-09-16 14:52:21 -0600154 if (rw & WRITE)
155 wmb_pmem();
Ross Zwisler9e853f22015-04-01 09:12:19 +0200156
Dan Williamse10624f2016-01-06 12:03:41 -0800157 /*
158 * The ->rw_page interface is subtle and tricky. The core
159 * retries on any error, so we can only invoke page_endio() in
160 * the successful completion case. Otherwise, we'll see crashes
161 * caused by double completion.
162 */
163 if (rc == 0)
164 page_endio(page, rw & WRITE, 0);
165
166 return rc;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200167}
168
169static long pmem_direct_access(struct block_device *bdev, sector_t sector,
Dan Williams34c0fd52016-01-15 16:56:14 -0800170 void __pmem **kaddr, pfn_t *pfn)
Ross Zwisler9e853f22015-04-01 09:12:19 +0200171{
Dan Williamsbd842b82016-03-18 23:47:43 -0700172 struct pmem_device *pmem = bdev->bd_queue->queuedata;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400173 resource_size_t offset = sector * 512 + pmem->data_offset;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200174
Ross Zwislere2e05392015-08-18 13:55:41 -0600175 *kaddr = pmem->virt_addr + offset;
Dan Williams34c0fd52016-01-15 16:56:14 -0800176 *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200177
Dan Williamscfe30b82016-03-03 09:38:00 -0800178 return pmem->size - pmem->pfn_pad - offset;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200179}
180
181static const struct block_device_operations pmem_fops = {
182 .owner = THIS_MODULE,
183 .rw_page = pmem_rw_page,
184 .direct_access = pmem_direct_access,
Dan Williams58138822015-06-23 20:08:34 -0400185 .revalidate_disk = nvdimm_revalidate_disk,
Ross Zwisler9e853f22015-04-01 09:12:19 +0200186};
187
Dan Williams030b99e2016-03-17 20:24:31 -0700188static void pmem_release_queue(void *q)
189{
190 blk_cleanup_queue(q);
191}
192
193void pmem_release_disk(void *disk)
194{
195 del_gendisk(disk);
196 put_disk(disk);
197}
198
Dan Williams200c79d2016-03-22 00:22:16 -0700199static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
200 struct resource *res, struct vmem_altmap *altmap);
201
202static int pmem_attach_disk(struct device *dev,
203 struct nd_namespace_common *ndns)
Ross Zwisler9e853f22015-04-01 09:12:19 +0200204{
Dan Williams200c79d2016-03-22 00:22:16 -0700205 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
206 struct vmem_altmap __altmap, *altmap = NULL;
207 struct resource *res = &nsio->res;
208 struct nd_pfn *nd_pfn = NULL;
209 int nid = dev_to_node(dev);
210 struct nd_pfn_sb *pfn_sb;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200211 struct pmem_device *pmem;
Dan Williams200c79d2016-03-22 00:22:16 -0700212 struct resource pfn_res;
Dan Williams468ded02016-01-15 16:56:46 -0800213 struct request_queue *q;
Dan Williams200c79d2016-03-22 00:22:16 -0700214 struct gendisk *disk;
215 void *addr;
216
217 /* while nsio_rw_bytes is active, parse a pfn info block if present */
218 if (is_nd_pfn(dev)) {
219 nd_pfn = to_nd_pfn(dev);
220 altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap);
221 if (IS_ERR(altmap))
222 return PTR_ERR(altmap);
223 }
224
225 /* we're attaching a block device, disable raw namespace access */
226 devm_nsio_disable(dev, nsio);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200227
Christoph Hellwig708ab622015-08-10 23:07:08 -0400228 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200229 if (!pmem)
Dan Williams200c79d2016-03-22 00:22:16 -0700230 return -ENOMEM;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200231
Dan Williams200c79d2016-03-22 00:22:16 -0700232 dev_set_drvdata(dev, pmem);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200233 pmem->phys_addr = res->start;
234 pmem->size = resource_size(res);
Dan Williams96601ad2015-08-24 18:29:38 -0400235 if (!arch_has_wmb_pmem())
Ross Zwisler61031952015-06-25 03:08:39 -0400236 dev_warn(dev, "unable to guarantee persistence of writes\n");
Ross Zwisler9e853f22015-04-01 09:12:19 +0200237
Dan Williams947df022016-03-21 22:28:40 -0700238 if (!devm_request_mem_region(dev, res->start, resource_size(res),
239 dev_name(dev))) {
240 dev_warn(dev, "could not reserve region %pR\n", res);
Dan Williams200c79d2016-03-22 00:22:16 -0700241 return -EBUSY;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200242 }
243
Dan Williams468ded02016-01-15 16:56:46 -0800244 q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
245 if (!q)
Dan Williams200c79d2016-03-22 00:22:16 -0700246 return -ENOMEM;
247 pmem->pmem_queue = q;
Dan Williams468ded02016-01-15 16:56:46 -0800248
Dan Williams34c0fd52016-01-15 16:56:14 -0800249 pmem->pfn_flags = PFN_DEV;
Dan Williams200c79d2016-03-22 00:22:16 -0700250 if (is_nd_pfn(dev)) {
251 addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
252 altmap);
253 pfn_sb = nd_pfn->pfn_sb;
254 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
255 pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res);
256 pmem->pfn_flags |= PFN_MAP;
257 res = &pfn_res; /* for badblocks populate */
258 res->start += pmem->data_offset;
259 } else if (pmem_should_map_pages(dev)) {
260 addr = devm_memremap_pages(dev, &nsio->res,
Dan Williams5c2c2582016-01-15 16:56:49 -0800261 &q->q_usage_counter, NULL);
Dan Williams34c0fd52016-01-15 16:56:14 -0800262 pmem->pfn_flags |= PFN_MAP;
263 } else
Dan Williams200c79d2016-03-22 00:22:16 -0700264 addr = devm_memremap(dev, pmem->phys_addr,
265 pmem->size, ARCH_MEMREMAP_PMEM);
Dan Williamsb36f4762015-09-15 02:42:20 -0400266
Dan Williams030b99e2016-03-17 20:24:31 -0700267 /*
268 * At release time the queue must be dead before
269 * devm_memremap_pages is unwound
270 */
271 if (devm_add_action(dev, pmem_release_queue, q)) {
Dan Williams468ded02016-01-15 16:56:46 -0800272 blk_cleanup_queue(q);
Dan Williams200c79d2016-03-22 00:22:16 -0700273 return -ENOMEM;
Dan Williams468ded02016-01-15 16:56:46 -0800274 }
Dan Williams8c2f7e82015-06-25 04:20:04 -0400275
Dan Williams200c79d2016-03-22 00:22:16 -0700276 if (IS_ERR(addr))
277 return PTR_ERR(addr);
278 pmem->virt_addr = (void __pmem *) addr;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200279
Ross Zwisler9e853f22015-04-01 09:12:19 +0200280 blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
Vishal Verma6b474962015-07-23 11:58:48 -0600281 blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
Dan Williams43d3fa32015-05-16 12:28:50 -0400282 blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200283 blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
Dan Williams0f51c4f2015-05-16 12:28:54 -0400284 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
Dan Williamsbd842b82016-03-18 23:47:43 -0700285 pmem->pmem_queue->queuedata = pmem;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200286
Dan Williams538ea4a2015-10-05 20:35:56 -0400287 disk = alloc_disk_node(0, nid);
Dan Williams030b99e2016-03-17 20:24:31 -0700288 if (!disk)
289 return -ENOMEM;
290 if (devm_add_action(dev, pmem_release_disk, disk)) {
291 put_disk(disk);
Dan Williams8c2f7e82015-06-25 04:20:04 -0400292 return -ENOMEM;
293 }
Ross Zwisler9e853f22015-04-01 09:12:19 +0200294
Ross Zwisler9e853f22015-04-01 09:12:19 +0200295 disk->fops = &pmem_fops;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200296 disk->queue = pmem->pmem_queue;
297 disk->flags = GENHD_FL_EXT_DEVT;
Vishal Verma5212e112015-06-25 04:20:32 -0400298 nvdimm_namespace_disk_name(ndns, disk->disk_name);
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400299 disk->driverfs_dev = dev;
Dan Williamscfe30b82016-03-03 09:38:00 -0800300 set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
301 / 512);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200302 pmem->pmem_disk = disk;
Dan Williamsb95f5f42016-01-04 23:50:23 -0800303 if (devm_init_badblocks(dev, &pmem->bb))
304 return -ENOMEM;
Dan Williams200c79d2016-03-22 00:22:16 -0700305 nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
Dan Williams57f7f312016-01-06 12:03:42 -0800306 disk->bb = &pmem->bb;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200307 add_disk(disk);
Dan Williams58138822015-06-23 20:08:34 -0400308 revalidate_disk(disk);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200309
Dan Williams8c2f7e82015-06-25 04:20:04 -0400310 return 0;
311}
Ross Zwisler9e853f22015-04-01 09:12:19 +0200312
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400313static int nd_pfn_init(struct nd_pfn *nd_pfn)
Ross Zwisler9e853f22015-04-01 09:12:19 +0200314{
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400315 struct nd_namespace_common *ndns = nd_pfn->ndns;
Dan Williamscfe30b82016-03-03 09:38:00 -0800316 u32 start_pad = 0, end_trunc = 0;
317 resource_size_t start, size;
318 struct nd_namespace_io *nsio;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400319 struct nd_region *nd_region;
Dan Williamsbd032942016-03-17 18:16:15 -0700320 struct nd_pfn_sb *pfn_sb;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400321 unsigned long npfns;
322 phys_addr_t offset;
323 u64 checksum;
324 int rc;
325
Dan Williamsbd032942016-03-17 18:16:15 -0700326 pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL);
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400327 if (!pfn_sb)
328 return -ENOMEM;
329
330 nd_pfn->pfn_sb = pfn_sb;
331 rc = nd_pfn_validate(nd_pfn);
Dan Williams3fa96262015-12-13 11:35:52 -0800332 if (rc == -ENODEV)
333 /* no info block, do init */;
334 else
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400335 return rc;
336
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400337 nd_region = to_nd_region(nd_pfn->dev.parent);
338 if (nd_region->ro) {
339 dev_info(&nd_pfn->dev,
340 "%s is read-only, unable to init metadata\n",
341 dev_name(&nd_region->dev));
Dan Williamsbd032942016-03-17 18:16:15 -0700342 return -ENXIO;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400343 }
344
345 memset(pfn_sb, 0, sizeof(*pfn_sb));
Dan Williamscfe30b82016-03-03 09:38:00 -0800346
347 /*
348 * Check if pmem collides with 'System RAM' when section aligned and
349 * trim it accordingly
350 */
351 nsio = to_nd_namespace_io(&ndns->dev);
352 start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
353 size = resource_size(&nsio->res);
354 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
355 IORES_DESC_NONE) == REGION_MIXED) {
356
357 start = nsio->res.start;
358 start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
359 }
360
361 start = nsio->res.start;
362 size = PHYS_SECTION_ALIGN_UP(start + size) - start;
363 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
364 IORES_DESC_NONE) == REGION_MIXED) {
365 size = resource_size(&nsio->res);
366 end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
367 }
368
369 if (start_pad + end_trunc)
370 dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
371 dev_name(&ndns->dev), start_pad + end_trunc);
372
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400373 /*
374 * Note, we use 64 here for the standard size of struct page,
375 * debugging options may cause it to be larger in which case the
376 * implementation will limit the pfns advertised through
377 * ->direct_access() to those that are included in the memmap.
378 */
Dan Williamscfe30b82016-03-03 09:38:00 -0800379 start += start_pad;
Dan Williams200c79d2016-03-22 00:22:16 -0700380 size = resource_size(&nsio->res);
381 npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400382 if (nd_pfn->mode == PFN_MODE_PMEM)
Dan Williamscfe30b82016-03-03 09:38:00 -0800383 offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align)
384 - start;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400385 else if (nd_pfn->mode == PFN_MODE_RAM)
Dan Williamscfe30b82016-03-03 09:38:00 -0800386 offset = ALIGN(start + SZ_8K, nd_pfn->align) - start;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400387 else
Dan Williamsbd032942016-03-17 18:16:15 -0700388 return -ENXIO;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400389
Dan Williams200c79d2016-03-22 00:22:16 -0700390 if (offset + start_pad + end_trunc >= size) {
Dan Williamscfe30b82016-03-03 09:38:00 -0800391 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
392 dev_name(&ndns->dev));
Dan Williamsbd032942016-03-17 18:16:15 -0700393 return -ENXIO;
Dan Williamscfe30b82016-03-03 09:38:00 -0800394 }
395
Dan Williams200c79d2016-03-22 00:22:16 -0700396 npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400397 pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
398 pfn_sb->dataoff = cpu_to_le64(offset);
399 pfn_sb->npfns = cpu_to_le64(npfns);
400 memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
401 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
Dan Williamsa34d5e82015-12-12 16:09:14 -0800402 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400403 pfn_sb->version_major = cpu_to_le16(1);
Dan Williamscfe30b82016-03-03 09:38:00 -0800404 pfn_sb->version_minor = cpu_to_le16(1);
405 pfn_sb->start_pad = cpu_to_le32(start_pad);
406 pfn_sb->end_trunc = cpu_to_le32(end_trunc);
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400407 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
408 pfn_sb->checksum = cpu_to_le64(checksum);
409
Dan Williamsbd032942016-03-17 18:16:15 -0700410 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400411}
412
Dan Williamsd9cbe092016-03-03 09:14:36 -0800413/*
414 * We hotplug memory at section granularity, pad the reserved area from
415 * the previous section base to the namespace base address.
416 */
417static unsigned long init_altmap_base(resource_size_t base)
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400418{
Dan Williams45f68802016-03-06 08:04:12 -0800419 unsigned long base_pfn = PHYS_PFN(base);
Dan Williamsd9cbe092016-03-03 09:14:36 -0800420
421 return PFN_SECTION_ALIGN_DOWN(base_pfn);
422}
423
424static unsigned long init_altmap_reserve(resource_size_t base)
425{
Dan Williams45f68802016-03-06 08:04:12 -0800426 unsigned long reserve = PHYS_PFN(SZ_8K);
427 unsigned long base_pfn = PHYS_PFN(base);
Dan Williamsd9cbe092016-03-03 09:14:36 -0800428
429 reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
430 return reserve;
431}
432
Dan Williams200c79d2016-03-22 00:22:16 -0700433static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
434 struct resource *res, struct vmem_altmap *altmap)
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400435{
Dan Williamscfe30b82016-03-03 09:38:00 -0800436 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
Dan Williams200c79d2016-03-22 00:22:16 -0700437 u64 offset = le64_to_cpu(pfn_sb->dataoff);
Dan Williamscfe30b82016-03-03 09:38:00 -0800438 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
439 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
Dan Williams200c79d2016-03-22 00:22:16 -0700440 struct nd_namespace_common *ndns = nd_pfn->ndns;
Dan Williamscfe30b82016-03-03 09:38:00 -0800441 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
442 resource_size_t base = nsio->res.start + start_pad;
Dan Williamsd2c0f042016-01-15 16:56:26 -0800443 struct vmem_altmap __altmap = {
Dan Williamscfe30b82016-03-03 09:38:00 -0800444 .base_pfn = init_altmap_base(base),
445 .reserve = init_altmap_reserve(base),
Dan Williamsd2c0f042016-01-15 16:56:26 -0800446 };
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400447
Dan Williams200c79d2016-03-22 00:22:16 -0700448 memcpy(res, &nsio->res, sizeof(*res));
449 res->start += start_pad;
450 res->end -= end_trunc;
451
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400452 nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
453 if (nd_pfn->mode == PFN_MODE_RAM) {
Dan Williams200c79d2016-03-22 00:22:16 -0700454 if (offset < SZ_8K)
455 return ERR_PTR(-EINVAL);
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400456 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
457 altmap = NULL;
Dan Williamsd2c0f042016-01-15 16:56:26 -0800458 } else if (nd_pfn->mode == PFN_MODE_PMEM) {
Dan Williams200c79d2016-03-22 00:22:16 -0700459 nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE;
Dan Williamsd2c0f042016-01-15 16:56:26 -0800460 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
461 dev_info(&nd_pfn->dev,
462 "number of pfns truncated from %lld to %ld\n",
463 le64_to_cpu(nd_pfn->pfn_sb->npfns),
464 nd_pfn->npfns);
Dan Williams200c79d2016-03-22 00:22:16 -0700465 memcpy(altmap, &__altmap, sizeof(*altmap));
466 altmap->free = PHYS_PFN(offset - SZ_8K);
Dan Williamsd2c0f042016-01-15 16:56:26 -0800467 altmap->alloc = 0;
Dan Williams030b99e2016-03-17 20:24:31 -0700468 } else
Dan Williams200c79d2016-03-22 00:22:16 -0700469 return ERR_PTR(-ENXIO);
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400470
Dan Williams200c79d2016-03-22 00:22:16 -0700471 return altmap;
Dan Williamscfe30b82016-03-03 09:38:00 -0800472}
473
Dan Williams200c79d2016-03-22 00:22:16 -0700474/*
475 * Determine the effective resource range and vmem_altmap from an nd_pfn
476 * instance.
477 */
478static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
479 struct resource *res, struct vmem_altmap *altmap)
Dan Williamscfe30b82016-03-03 09:38:00 -0800480{
Dan Williamscfe30b82016-03-03 09:38:00 -0800481 int rc;
482
483 if (!nd_pfn->uuid || !nd_pfn->ndns)
Dan Williams200c79d2016-03-22 00:22:16 -0700484 return ERR_PTR(-ENODEV);
Dan Williamscfe30b82016-03-03 09:38:00 -0800485
486 rc = nd_pfn_init(nd_pfn);
487 if (rc)
Dan Williams200c79d2016-03-22 00:22:16 -0700488 return ERR_PTR(rc);
489
Dan Williamscfe30b82016-03-03 09:38:00 -0800490 /* we need a valid pfn_sb before we can init a vmem_altmap */
Dan Williams200c79d2016-03-22 00:22:16 -0700491 return __nvdimm_setup_pfn(nd_pfn, res, altmap);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200492}
493
Dan Williams9f53f9f2015-06-09 15:33:45 -0400494static int nd_pmem_probe(struct device *dev)
Ross Zwisler9e853f22015-04-01 09:12:19 +0200495{
Dan Williams8c2f7e82015-06-25 04:20:04 -0400496 struct nd_namespace_common *ndns;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200497
Dan Williams8c2f7e82015-06-25 04:20:04 -0400498 ndns = nvdimm_namespace_common_probe(dev);
499 if (IS_ERR(ndns))
500 return PTR_ERR(ndns);
Dan Williamsbf9bccc2015-06-17 17:14:46 -0400501
Dan Williams200c79d2016-03-22 00:22:16 -0700502 if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev)))
503 return -ENXIO;
Ross Zwisler9e853f22015-04-01 09:12:19 +0200504
Dan Williams200c79d2016-03-22 00:22:16 -0700505 if (is_nd_btt(dev))
Christoph Hellwig708ab622015-08-10 23:07:08 -0400506 return nvdimm_namespace_attach_btt(ndns);
507
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400508 if (is_nd_pfn(dev))
Dan Williams200c79d2016-03-22 00:22:16 -0700509 return pmem_attach_disk(dev, ndns);
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400510
Dan Williams200c79d2016-03-22 00:22:16 -0700511 /* if we find a valid info-block we'll come back as that personality */
512 if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0)
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400513 return -ENXIO;
Dan Williams32ab0a3f2015-08-01 02:16:37 -0400514
Dan Williams200c79d2016-03-22 00:22:16 -0700515 /* ...otherwise we're just a raw pmem device */
516 return pmem_attach_disk(dev, ndns);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200517}
518
Dan Williams9f53f9f2015-06-09 15:33:45 -0400519static int nd_pmem_remove(struct device *dev)
Ross Zwisler9e853f22015-04-01 09:12:19 +0200520{
Dan Williams8c2f7e82015-06-25 04:20:04 -0400521 if (is_nd_btt(dev))
Dan Williams298f2bc2016-03-15 16:41:04 -0700522 nvdimm_namespace_detach_btt(to_nd_btt(dev));
Ross Zwisler9e853f22015-04-01 09:12:19 +0200523 return 0;
524}
525
Dan Williams71999462016-02-18 10:29:49 -0800526static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
527{
Dan Williamsa3901802016-04-07 20:02:06 -0700528 struct nd_region *nd_region = to_nd_region(dev->parent);
Dan Williams298f2bc2016-03-15 16:41:04 -0700529 struct pmem_device *pmem = dev_get_drvdata(dev);
530 resource_size_t offset = 0, end_trunc = 0;
531 struct nd_namespace_common *ndns;
532 struct nd_namespace_io *nsio;
533 struct resource res;
Dan Williams71999462016-02-18 10:29:49 -0800534
535 if (event != NVDIMM_REVALIDATE_POISON)
536 return;
537
Dan Williams298f2bc2016-03-15 16:41:04 -0700538 if (is_nd_btt(dev)) {
539 struct nd_btt *nd_btt = to_nd_btt(dev);
540
541 ndns = nd_btt->ndns;
542 } else if (is_nd_pfn(dev)) {
Dan Williamsa3901802016-04-07 20:02:06 -0700543 struct nd_pfn *nd_pfn = to_nd_pfn(dev);
544 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
545
Dan Williams298f2bc2016-03-15 16:41:04 -0700546 ndns = nd_pfn->ndns;
547 offset = pmem->data_offset + __le32_to_cpu(pfn_sb->start_pad);
548 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
549 } else
550 ndns = to_ndns(dev);
Dan Williamsa3901802016-04-07 20:02:06 -0700551
Dan Williams298f2bc2016-03-15 16:41:04 -0700552 nsio = to_nd_namespace_io(&ndns->dev);
553 res.start = nsio->res.start + offset;
554 res.end = nsio->res.end - end_trunc;
Dan Williamsa3901802016-04-07 20:02:06 -0700555 nvdimm_badblocks_populate(nd_region, &pmem->bb, &res);
Dan Williams71999462016-02-18 10:29:49 -0800556}
557
Dan Williams9f53f9f2015-06-09 15:33:45 -0400558MODULE_ALIAS("pmem");
559MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
Dan Williamsbf9bccc2015-06-17 17:14:46 -0400560MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
Dan Williams9f53f9f2015-06-09 15:33:45 -0400561static struct nd_device_driver nd_pmem_driver = {
562 .probe = nd_pmem_probe,
563 .remove = nd_pmem_remove,
Dan Williams71999462016-02-18 10:29:49 -0800564 .notify = nd_pmem_notify,
Dan Williams9f53f9f2015-06-09 15:33:45 -0400565 .drv = {
566 .name = "nd_pmem",
Ross Zwisler9e853f22015-04-01 09:12:19 +0200567 },
Dan Williamsbf9bccc2015-06-17 17:14:46 -0400568 .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
Ross Zwisler9e853f22015-04-01 09:12:19 +0200569};
570
571static int __init pmem_init(void)
572{
NeilBrown55155292016-03-09 09:21:54 +1100573 return nd_driver_register(&nd_pmem_driver);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200574}
575module_init(pmem_init);
576
577static void pmem_exit(void)
578{
Dan Williams9f53f9f2015-06-09 15:33:45 -0400579 driver_unregister(&nd_pmem_driver.drv);
Ross Zwisler9e853f22015-04-01 09:12:19 +0200580}
581module_exit(pmem_exit);
582
583MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
584MODULE_LICENSE("GPL v2");