blob: 8bd645a1913ba70821b81739532d3045457e12db [file] [log] [blame]
Tom Haynesf54bcf22014-12-11 15:34:59 -05001/*
2 * Common NFS I/O operations for the pnfs file based
3 * layout drivers.
4 *
5 * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
6 *
7 * Tom Haynes <loghyr@primarydata.com>
8 */
9
10#include <linux/nfs_fs.h>
11#include <linux/nfs_page.h>
Peng Tao6b7f3cf2014-05-29 21:06:59 +080012#include <linux/sunrpc/addr.h>
Peng Tao5f01d952014-05-30 18:15:59 +080013#include <linux/module.h>
Tom Haynesf54bcf22014-12-11 15:34:59 -050014
Peng Tao7405f9e2014-05-29 21:06:58 +080015#include "nfs4session.h"
Tom Haynesf54bcf22014-12-11 15:34:59 -050016#include "internal.h"
17#include "pnfs.h"
18
Peng Tao875ae062014-05-29 21:06:57 +080019#define NFSDBG_FACILITY NFSDBG_PNFS
20
Tom Haynesf54bcf22014-12-11 15:34:59 -050021void pnfs_generic_rw_release(void *data)
22{
23 struct nfs_pgio_header *hdr = data;
Tom Haynesf54bcf22014-12-11 15:34:59 -050024
Tom Haynesf54bcf22014-12-11 15:34:59 -050025 nfs_put_client(hdr->ds_clp);
26 hdr->mds_ops->rpc_release(data);
27}
28EXPORT_SYMBOL_GPL(pnfs_generic_rw_release);
29
30/* Fake up some data that will cause nfs_commit_release to retry the writes. */
31void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data)
32{
33 struct nfs_page *first = nfs_list_entry(data->pages.next);
34
35 data->task.tk_status = 0;
36 memcpy(&data->verf.verifier, &first->wb_verf,
37 sizeof(data->verf.verifier));
38 data->verf.verifier.data[0]++; /* ensure verifier mismatch */
39}
40EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
41
42void pnfs_generic_write_commit_done(struct rpc_task *task, void *data)
43{
44 struct nfs_commit_data *wdata = data;
45
46 /* Note this may cause RPC to be resent */
47 wdata->mds_ops->rpc_call_done(task, data);
48}
49EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done);
50
51void pnfs_generic_commit_release(void *calldata)
52{
53 struct nfs_commit_data *data = calldata;
54
55 data->completion_ops->completion(data);
56 pnfs_put_lseg(data->lseg);
57 nfs_put_client(data->ds_clp);
58 nfs_commitdata_release(data);
59}
60EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
61
62/* The generic layer is about to remove the req from the commit list.
63 * If this will make the bucket empty, it will need to put the lseg reference.
Tom Haynes085d1e32014-12-11 13:04:55 -050064 * Note this must be called holding the inode (/cinfo) lock
Tom Haynesf54bcf22014-12-11 15:34:59 -050065 */
66void
67pnfs_generic_clear_request_commit(struct nfs_page *req,
68 struct nfs_commit_info *cinfo)
69{
70 struct pnfs_layout_segment *freeme = NULL;
71
72 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
73 goto out;
74 cinfo->ds->nwritten--;
75 if (list_is_singular(&req->wb_list)) {
76 struct pnfs_commit_bucket *bucket;
77
78 bucket = list_first_entry(&req->wb_list,
79 struct pnfs_commit_bucket,
80 written);
81 freeme = bucket->wlseg;
82 bucket->wlseg = NULL;
83 }
84out:
85 nfs_request_remove_commit_list(req, cinfo);
86 pnfs_put_lseg_locked(freeme);
87}
88EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);
89
90static int
91pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst,
92 struct nfs_commit_info *cinfo, int max)
93{
94 struct nfs_page *req, *tmp;
95 int ret = 0;
96
97 list_for_each_entry_safe(req, tmp, src, wb_list) {
98 if (!nfs_lock_request(req))
99 continue;
100 kref_get(&req->wb_kref);
101 if (cond_resched_lock(cinfo->lock))
102 list_safe_reset_next(req, tmp, wb_list);
103 nfs_request_remove_commit_list(req, cinfo);
104 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
105 nfs_list_add_request(req, dst);
106 ret++;
107 if ((ret == max) && !cinfo->dreq)
108 break;
109 }
110 return ret;
111}
112
Tom Haynesf54bcf22014-12-11 15:34:59 -0500113static int
114pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
115 struct nfs_commit_info *cinfo,
116 int max)
117{
118 struct list_head *src = &bucket->written;
119 struct list_head *dst = &bucket->committing;
120 int ret;
121
Tom Haynes085d1e32014-12-11 13:04:55 -0500122 lockdep_assert_held(cinfo->lock);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500123 ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max);
124 if (ret) {
125 cinfo->ds->nwritten -= ret;
126 cinfo->ds->ncommitting += ret;
Trond Myklebust94d06a42015-08-03 17:38:33 -0400127 if (bucket->clseg == NULL)
128 bucket->clseg = pnfs_get_lseg(bucket->wlseg);
129 if (list_empty(src)) {
130 pnfs_put_lseg_locked(bucket->wlseg);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500131 bucket->wlseg = NULL;
Trond Myklebust94d06a42015-08-03 17:38:33 -0400132 }
Tom Haynesf54bcf22014-12-11 15:34:59 -0500133 }
134 return ret;
135}
136
Tom Haynes085d1e32014-12-11 13:04:55 -0500137/* Move reqs from written to committing lists, returning count
138 * of number moved.
Tom Haynesf54bcf22014-12-11 15:34:59 -0500139 */
140int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo,
141 int max)
142{
143 int i, rv = 0, cnt;
144
Tom Haynes085d1e32014-12-11 13:04:55 -0500145 lockdep_assert_held(cinfo->lock);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500146 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
147 cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i],
148 cinfo, max);
149 max -= cnt;
150 rv += cnt;
151 }
152 return rv;
153}
154EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists);
155
Tom Haynes085d1e32014-12-11 13:04:55 -0500156/* Pull everything off the committing lists and dump into @dst. */
Tom Haynesf54bcf22014-12-11 15:34:59 -0500157void pnfs_generic_recover_commit_reqs(struct list_head *dst,
158 struct nfs_commit_info *cinfo)
159{
160 struct pnfs_commit_bucket *b;
161 struct pnfs_layout_segment *freeme;
162 int i;
163
Tom Haynes085d1e32014-12-11 13:04:55 -0500164 lockdep_assert_held(cinfo->lock);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500165restart:
Tom Haynesf54bcf22014-12-11 15:34:59 -0500166 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
167 if (pnfs_generic_transfer_commit_list(&b->written, dst,
168 cinfo, 0)) {
169 freeme = b->wlseg;
170 b->wlseg = NULL;
171 spin_unlock(cinfo->lock);
172 pnfs_put_lseg(freeme);
Tom Haynes085d1e32014-12-11 13:04:55 -0500173 spin_lock(cinfo->lock);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500174 goto restart;
175 }
176 }
177 cinfo->ds->nwritten = 0;
Tom Haynesf54bcf22014-12-11 15:34:59 -0500178}
179EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
180
181static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
182{
183 struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
184 struct pnfs_commit_bucket *bucket;
185 struct pnfs_layout_segment *freeme;
Trond Myklebust94d06a42015-08-03 17:38:33 -0400186 LIST_HEAD(pages);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500187 int i;
188
Trond Myklebust94d06a42015-08-03 17:38:33 -0400189 spin_lock(cinfo->lock);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500190 for (i = idx; i < fl_cinfo->nbuckets; i++) {
191 bucket = &fl_cinfo->buckets[i];
192 if (list_empty(&bucket->committing))
193 continue;
Tom Haynesf54bcf22014-12-11 15:34:59 -0500194 freeme = bucket->clseg;
195 bucket->clseg = NULL;
Trond Myklebust94d06a42015-08-03 17:38:33 -0400196 list_splice_init(&bucket->committing, &pages);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500197 spin_unlock(cinfo->lock);
Trond Myklebust94d06a42015-08-03 17:38:33 -0400198 nfs_retry_commit(&pages, freeme, cinfo, i);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500199 pnfs_put_lseg(freeme);
Trond Myklebust94d06a42015-08-03 17:38:33 -0400200 spin_lock(cinfo->lock);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500201 }
Trond Myklebust94d06a42015-08-03 17:38:33 -0400202 spin_unlock(cinfo->lock);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500203}
204
205static unsigned int
206pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
207 struct list_head *list)
208{
209 struct pnfs_ds_commit_info *fl_cinfo;
210 struct pnfs_commit_bucket *bucket;
211 struct nfs_commit_data *data;
212 int i;
213 unsigned int nreq = 0;
214
215 fl_cinfo = cinfo->ds;
216 bucket = fl_cinfo->buckets;
217 for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
218 if (list_empty(&bucket->committing))
219 continue;
220 data = nfs_commitdata_alloc();
221 if (!data)
222 break;
223 data->ds_commit_index = i;
Tom Haynesf54bcf22014-12-11 15:34:59 -0500224 list_add(&data->pages, list);
225 nreq++;
226 }
227
228 /* Clean up on error */
229 pnfs_generic_retry_commit(cinfo, i);
230 return nreq;
231}
232
Trond Myklebust94d06a42015-08-03 17:38:33 -0400233static inline
234void pnfs_fetch_commit_bucket_list(struct list_head *pages,
235 struct nfs_commit_data *data,
236 struct nfs_commit_info *cinfo)
237{
238 struct pnfs_commit_bucket *bucket;
239
240 bucket = &cinfo->ds->buckets[data->ds_commit_index];
241 spin_lock(cinfo->lock);
242 list_splice_init(pages, &bucket->committing);
243 data->lseg = bucket->clseg;
244 bucket->clseg = NULL;
245 spin_unlock(cinfo->lock);
246
247}
248
Weston Andros Adamson691c5072016-05-25 10:07:23 -0400249/* Helper function for pnfs_generic_commit_pagelist to catch an empty
250 * page list. This can happen when two commits race. */
251static bool
252pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
253 struct nfs_commit_data *data,
254 struct nfs_commit_info *cinfo)
255{
256 if (list_empty(pages)) {
257 if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
258 wake_up_atomic_t(&cinfo->mds->rpcs_out);
259 nfs_commitdata_release(data);
260 return true;
261 }
262
263 return false;
264}
265
Tom Haynesf54bcf22014-12-11 15:34:59 -0500266/* This follows nfs_commit_list pretty closely */
267int
268pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
269 int how, struct nfs_commit_info *cinfo,
270 int (*initiate_commit)(struct nfs_commit_data *data,
271 int how))
272{
273 struct nfs_commit_data *data, *tmp;
274 LIST_HEAD(list);
275 unsigned int nreq = 0;
276
277 if (!list_empty(mds_pages)) {
278 data = nfs_commitdata_alloc();
279 if (data != NULL) {
Trond Myklebust94d06a42015-08-03 17:38:33 -0400280 data->ds_commit_index = -1;
Tom Haynesf54bcf22014-12-11 15:34:59 -0500281 list_add(&data->pages, &list);
282 nreq++;
283 } else {
Weston Andros Adamsonb57ff132014-09-05 18:20:21 -0400284 nfs_retry_commit(mds_pages, NULL, cinfo, 0);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500285 pnfs_generic_retry_commit(cinfo, 0);
286 cinfo->completion_ops->error_cleanup(NFS_I(inode));
287 return -ENOMEM;
288 }
289 }
290
291 nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
292
293 if (nreq == 0) {
294 cinfo->completion_ops->error_cleanup(NFS_I(inode));
295 goto out;
296 }
297
298 atomic_add(nreq, &cinfo->mds->rpcs_out);
299
300 list_for_each_entry_safe(data, tmp, &list, pages) {
301 list_del_init(&data->pages);
Trond Myklebust94d06a42015-08-03 17:38:33 -0400302 if (data->ds_commit_index < 0) {
Weston Andros Adamson691c5072016-05-25 10:07:23 -0400303 /* another commit raced with us */
304 if (pnfs_generic_commit_cancel_empty_pagelist(mds_pages,
305 data, cinfo))
306 continue;
307
Tom Haynesf54bcf22014-12-11 15:34:59 -0500308 nfs_init_commit(data, mds_pages, NULL, cinfo);
309 nfs_initiate_commit(NFS_CLIENT(inode), data,
Peng Taoc36aae92014-06-09 07:10:14 +0800310 NFS_PROTO(data->inode),
Tom Haynesf54bcf22014-12-11 15:34:59 -0500311 data->mds_ops, how, 0);
312 } else {
Trond Myklebust94d06a42015-08-03 17:38:33 -0400313 LIST_HEAD(pages);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500314
Trond Myklebust94d06a42015-08-03 17:38:33 -0400315 pnfs_fetch_commit_bucket_list(&pages, data, cinfo);
Weston Andros Adamson691c5072016-05-25 10:07:23 -0400316
317 /* another commit raced with us */
318 if (pnfs_generic_commit_cancel_empty_pagelist(&pages,
319 data, cinfo))
320 continue;
321
Trond Myklebust94d06a42015-08-03 17:38:33 -0400322 nfs_init_commit(data, &pages, data->lseg, cinfo);
Tom Haynesf54bcf22014-12-11 15:34:59 -0500323 initiate_commit(data, how);
324 }
325 }
326out:
327 cinfo->ds->ncommitting = 0;
328 return PNFS_ATTEMPTED;
329}
330EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
Peng Tao875ae062014-05-29 21:06:57 +0800331
332/*
333 * Data server cache
334 *
335 * Data servers can be mapped to different device ids.
336 * nfs4_pnfs_ds reference counting
337 * - set to 1 on allocation
338 * - incremented when a device id maps a data server already in the cache.
339 * - decremented when deviceid is removed from the cache.
340 */
341static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
342static LIST_HEAD(nfs4_data_server_cache);
343
344/* Debug routines */
345static void
346print_ds(struct nfs4_pnfs_ds *ds)
347{
348 if (ds == NULL) {
349 printk(KERN_WARNING "%s NULL device\n", __func__);
350 return;
351 }
352 printk(KERN_WARNING " ds %s\n"
353 " ref count %d\n"
354 " client %p\n"
355 " cl_exchange_flags %x\n",
356 ds->ds_remotestr,
357 atomic_read(&ds->ds_count), ds->ds_clp,
358 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
359}
360
361static bool
362same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
363{
364 struct sockaddr_in *a, *b;
365 struct sockaddr_in6 *a6, *b6;
366
367 if (addr1->sa_family != addr2->sa_family)
368 return false;
369
370 switch (addr1->sa_family) {
371 case AF_INET:
372 a = (struct sockaddr_in *)addr1;
373 b = (struct sockaddr_in *)addr2;
374
375 if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
376 a->sin_port == b->sin_port)
377 return true;
378 break;
379
380 case AF_INET6:
381 a6 = (struct sockaddr_in6 *)addr1;
382 b6 = (struct sockaddr_in6 *)addr2;
383
384 /* LINKLOCAL addresses must have matching scope_id */
385 if (ipv6_addr_src_scope(&a6->sin6_addr) ==
386 IPV6_ADDR_SCOPE_LINKLOCAL &&
387 a6->sin6_scope_id != b6->sin6_scope_id)
388 return false;
389
390 if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
391 a6->sin6_port == b6->sin6_port)
392 return true;
393 break;
394
395 default:
396 dprintk("%s: unhandled address family: %u\n",
397 __func__, addr1->sa_family);
398 return false;
399 }
400
401 return false;
402}
403
Trond Myklebust0bdce6a82015-08-13 10:59:07 -0400404/*
405 * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
406 * declare a match.
407 */
Peng Tao875ae062014-05-29 21:06:57 +0800408static bool
409_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
410 const struct list_head *dsaddrs2)
411{
412 struct nfs4_pnfs_ds_addr *da1, *da2;
Trond Myklebust0bdce6a82015-08-13 10:59:07 -0400413 struct sockaddr *sa1, *sa2;
414 bool match = false;
Peng Tao875ae062014-05-29 21:06:57 +0800415
Trond Myklebust0bdce6a82015-08-13 10:59:07 -0400416 list_for_each_entry(da1, dsaddrs1, da_node) {
417 sa1 = (struct sockaddr *)&da1->da_addr;
418 match = false;
419 list_for_each_entry(da2, dsaddrs2, da_node) {
420 sa2 = (struct sockaddr *)&da2->da_addr;
421 match = same_sockaddr(sa1, sa2);
422 if (match)
423 break;
424 }
425 if (!match)
426 break;
Peng Tao875ae062014-05-29 21:06:57 +0800427 }
Trond Myklebust0bdce6a82015-08-13 10:59:07 -0400428 return match;
Peng Tao875ae062014-05-29 21:06:57 +0800429}
430
431/*
432 * Lookup DS by addresses. nfs4_ds_cache_lock is held
433 */
434static struct nfs4_pnfs_ds *
435_data_server_lookup_locked(const struct list_head *dsaddrs)
436{
437 struct nfs4_pnfs_ds *ds;
438
439 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
440 if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
441 return ds;
442 return NULL;
443}
444
445static void destroy_ds(struct nfs4_pnfs_ds *ds)
446{
447 struct nfs4_pnfs_ds_addr *da;
448
449 dprintk("--> %s\n", __func__);
450 ifdebug(FACILITY)
451 print_ds(ds);
452
453 nfs_put_client(ds->ds_clp);
454
455 while (!list_empty(&ds->ds_addrs)) {
456 da = list_first_entry(&ds->ds_addrs,
457 struct nfs4_pnfs_ds_addr,
458 da_node);
459 list_del_init(&da->da_node);
460 kfree(da->da_remotestr);
461 kfree(da);
462 }
463
464 kfree(ds->ds_remotestr);
465 kfree(ds);
466}
467
468void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
469{
470 if (atomic_dec_and_lock(&ds->ds_count,
471 &nfs4_ds_cache_lock)) {
472 list_del_init(&ds->ds_node);
473 spin_unlock(&nfs4_ds_cache_lock);
474 destroy_ds(ds);
475 }
476}
477EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put);
478
479/*
480 * Create a string with a human readable address and port to avoid
481 * complicated setup around many dprinks.
482 */
483static char *
484nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
485{
486 struct nfs4_pnfs_ds_addr *da;
487 char *remotestr;
488 size_t len;
489 char *p;
490
491 len = 3; /* '{', '}' and eol */
492 list_for_each_entry(da, dsaddrs, da_node) {
493 len += strlen(da->da_remotestr) + 1; /* string plus comma */
494 }
495
496 remotestr = kzalloc(len, gfp_flags);
497 if (!remotestr)
498 return NULL;
499
500 p = remotestr;
501 *(p++) = '{';
502 len--;
503 list_for_each_entry(da, dsaddrs, da_node) {
504 size_t ll = strlen(da->da_remotestr);
505
506 if (ll > len)
507 goto out_err;
508
509 memcpy(p, da->da_remotestr, ll);
510 p += ll;
511 len -= ll;
512
513 if (len < 1)
514 goto out_err;
515 (*p++) = ',';
516 len--;
517 }
518 if (len < 2)
519 goto out_err;
520 *(p++) = '}';
521 *p = '\0';
522 return remotestr;
523out_err:
524 kfree(remotestr);
525 return NULL;
526}
527
528/*
529 * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if
530 * uncached and return cached struct nfs4_pnfs_ds.
531 */
532struct nfs4_pnfs_ds *
533nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
534{
535 struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
536 char *remotestr;
537
538 if (list_empty(dsaddrs)) {
539 dprintk("%s: no addresses defined\n", __func__);
540 goto out;
541 }
542
543 ds = kzalloc(sizeof(*ds), gfp_flags);
544 if (!ds)
545 goto out;
546
547 /* this is only used for debugging, so it's ok if its NULL */
548 remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
549
550 spin_lock(&nfs4_ds_cache_lock);
551 tmp_ds = _data_server_lookup_locked(dsaddrs);
552 if (tmp_ds == NULL) {
553 INIT_LIST_HEAD(&ds->ds_addrs);
554 list_splice_init(dsaddrs, &ds->ds_addrs);
555 ds->ds_remotestr = remotestr;
556 atomic_set(&ds->ds_count, 1);
557 INIT_LIST_HEAD(&ds->ds_node);
558 ds->ds_clp = NULL;
559 list_add(&ds->ds_node, &nfs4_data_server_cache);
560 dprintk("%s add new data server %s\n", __func__,
561 ds->ds_remotestr);
562 } else {
563 kfree(remotestr);
564 kfree(ds);
565 atomic_inc(&tmp_ds->ds_count);
566 dprintk("%s data server %s found, inc'ed ds_count to %d\n",
567 __func__, tmp_ds->ds_remotestr,
568 atomic_read(&tmp_ds->ds_count));
569 ds = tmp_ds;
570 }
571 spin_unlock(&nfs4_ds_cache_lock);
572out:
573 return ds;
574}
575EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
Peng Tao6b7f3cf2014-05-29 21:06:59 +0800576
Peng Tao7405f9e2014-05-29 21:06:58 +0800577static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
578{
579 might_sleep();
580 wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
581 TASK_KILLABLE);
582}
583
584static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
585{
586 smp_mb__before_atomic();
587 clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
588 smp_mb__after_atomic();
589 wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
590}
591
Peng Tao5f01d952014-05-30 18:15:59 +0800592static struct nfs_client *(*get_v3_ds_connect)(
593 struct nfs_client *mds_clp,
594 const struct sockaddr *ds_addr,
595 int ds_addrlen,
596 int ds_proto,
597 unsigned int ds_timeo,
598 unsigned int ds_retrans,
599 rpc_authflavor_t au_flavor);
600
601static bool load_v3_ds_connect(void)
602{
603 if (!get_v3_ds_connect) {
604 get_v3_ds_connect = symbol_request(nfs3_set_ds_client);
605 WARN_ON_ONCE(!get_v3_ds_connect);
606 }
607
608 return(get_v3_ds_connect != NULL);
609}
610
Arnd Bergmanndf137bc2015-03-11 14:37:25 +0100611void nfs4_pnfs_v3_ds_connect_unload(void)
Peng Tao5f01d952014-05-30 18:15:59 +0800612{
613 if (get_v3_ds_connect) {
614 symbol_put(nfs3_set_ds_client);
615 get_v3_ds_connect = NULL;
616 }
617}
618EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload);
619
620static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
621 struct nfs4_pnfs_ds *ds,
622 unsigned int timeo,
623 unsigned int retrans,
624 rpc_authflavor_t au_flavor)
625{
626 struct nfs_client *clp = ERR_PTR(-EIO);
627 struct nfs4_pnfs_ds_addr *da;
628 int status = 0;
629
630 dprintk("--> %s DS %s au_flavor %d\n", __func__,
631 ds->ds_remotestr, au_flavor);
632
633 if (!load_v3_ds_connect())
634 goto out;
635
636 list_for_each_entry(da, &ds->ds_addrs, da_node) {
637 dprintk("%s: DS %s: trying address %s\n",
638 __func__, ds->ds_remotestr, da->da_remotestr);
639
640 clp = get_v3_ds_connect(mds_srv->nfs_client,
641 (struct sockaddr *)&da->da_addr,
642 da->da_addrlen, IPPROTO_TCP,
643 timeo, retrans, au_flavor);
644 if (!IS_ERR(clp))
645 break;
646 }
647
648 if (IS_ERR(clp)) {
649 status = PTR_ERR(clp);
650 goto out;
651 }
652
653 smp_wmb();
654 ds->ds_clp = clp;
655 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
656out:
657 return status;
658}
659
660static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
Peng Tao7405f9e2014-05-29 21:06:58 +0800661 struct nfs4_pnfs_ds *ds,
662 unsigned int timeo,
Peng Tao064172f2014-05-29 21:07:00 +0800663 unsigned int retrans,
Peng Tao30626f92014-05-30 18:15:58 +0800664 u32 minor_version,
Peng Tao064172f2014-05-29 21:07:00 +0800665 rpc_authflavor_t au_flavor)
Peng Tao7405f9e2014-05-29 21:06:58 +0800666{
667 struct nfs_client *clp = ERR_PTR(-EIO);
668 struct nfs4_pnfs_ds_addr *da;
669 int status = 0;
670
671 dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
Peng Tao5f01d952014-05-30 18:15:59 +0800672 au_flavor);
Peng Tao7405f9e2014-05-29 21:06:58 +0800673
674 list_for_each_entry(da, &ds->ds_addrs, da_node) {
675 dprintk("%s: DS %s: trying address %s\n",
676 __func__, ds->ds_remotestr, da->da_remotestr);
677
678 clp = nfs4_set_ds_client(mds_srv->nfs_client,
679 (struct sockaddr *)&da->da_addr,
680 da->da_addrlen, IPPROTO_TCP,
Peng Tao30626f92014-05-30 18:15:58 +0800681 timeo, retrans, minor_version,
682 au_flavor);
Peng Tao7405f9e2014-05-29 21:06:58 +0800683 if (!IS_ERR(clp))
684 break;
685 }
686
687 if (IS_ERR(clp)) {
688 status = PTR_ERR(clp);
689 goto out;
690 }
691
692 status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
693 if (status)
694 goto out_put;
695
696 smp_wmb();
697 ds->ds_clp = clp;
698 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
699out:
700 return status;
701out_put:
702 nfs_put_client(clp);
703 goto out;
704}
705
706/*
707 * Create an rpc connection to the nfs4_pnfs_ds data server.
708 * Currently only supports IPv4 and IPv6 addresses.
709 * If connection fails, make devid unavailable.
710 */
711void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
712 struct nfs4_deviceid_node *devid, unsigned int timeo,
Peng Tao30626f92014-05-30 18:15:58 +0800713 unsigned int retrans, u32 version,
714 u32 minor_version, rpc_authflavor_t au_flavor)
Peng Tao7405f9e2014-05-29 21:06:58 +0800715{
716 if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
717 int err = 0;
718
Peng Tao5f01d952014-05-30 18:15:59 +0800719 if (version == 3) {
720 err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
721 retrans, au_flavor);
722 } else if (version == 4) {
723 err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo,
724 retrans, minor_version,
725 au_flavor);
726 } else {
727 dprintk("%s: unsupported DS version %d\n", __func__,
728 version);
729 err = -EPROTONOSUPPORT;
730 }
731
Peng Tao7405f9e2014-05-29 21:06:58 +0800732 if (err)
733 nfs4_mark_deviceid_unavailable(devid);
734 nfs4_clear_ds_conn_bit(ds);
735 } else {
736 nfs4_wait_ds_connect(ds);
737 }
738}
739EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
740
Peng Tao6b7f3cf2014-05-29 21:06:59 +0800741/*
742 * Currently only supports ipv4, ipv6 and one multi-path address.
743 */
744struct nfs4_pnfs_ds_addr *
745nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
746{
747 struct nfs4_pnfs_ds_addr *da = NULL;
748 char *buf, *portstr;
749 __be16 port;
750 int nlen, rlen;
751 int tmp[2];
752 __be32 *p;
753 char *netid, *match_netid;
754 size_t len, match_netid_len;
755 char *startsep = "";
756 char *endsep = "";
757
758
759 /* r_netid */
760 p = xdr_inline_decode(xdr, 4);
761 if (unlikely(!p))
762 goto out_err;
763 nlen = be32_to_cpup(p++);
764
765 p = xdr_inline_decode(xdr, nlen);
766 if (unlikely(!p))
767 goto out_err;
768
769 netid = kmalloc(nlen+1, gfp_flags);
770 if (unlikely(!netid))
771 goto out_err;
772
773 netid[nlen] = '\0';
774 memcpy(netid, p, nlen);
775
776 /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
777 p = xdr_inline_decode(xdr, 4);
778 if (unlikely(!p))
779 goto out_free_netid;
780 rlen = be32_to_cpup(p);
781
782 p = xdr_inline_decode(xdr, rlen);
783 if (unlikely(!p))
784 goto out_free_netid;
785
786 /* port is ".ABC.DEF", 8 chars max */
787 if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
788 dprintk("%s: Invalid address, length %d\n", __func__,
789 rlen);
790 goto out_free_netid;
791 }
792 buf = kmalloc(rlen + 1, gfp_flags);
793 if (!buf) {
794 dprintk("%s: Not enough memory\n", __func__);
795 goto out_free_netid;
796 }
797 buf[rlen] = '\0';
798 memcpy(buf, p, rlen);
799
800 /* replace port '.' with '-' */
801 portstr = strrchr(buf, '.');
802 if (!portstr) {
803 dprintk("%s: Failed finding expected dot in port\n",
804 __func__);
805 goto out_free_buf;
806 }
807 *portstr = '-';
808
809 /* find '.' between address and port */
810 portstr = strrchr(buf, '.');
811 if (!portstr) {
812 dprintk("%s: Failed finding expected dot between address and "
813 "port\n", __func__);
814 goto out_free_buf;
815 }
816 *portstr = '\0';
817
818 da = kzalloc(sizeof(*da), gfp_flags);
819 if (unlikely(!da))
820 goto out_free_buf;
821
822 INIT_LIST_HEAD(&da->da_node);
823
824 if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
825 sizeof(da->da_addr))) {
826 dprintk("%s: error parsing address %s\n", __func__, buf);
827 goto out_free_da;
828 }
829
830 portstr++;
831 sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
832 port = htons((tmp[0] << 8) | (tmp[1]));
833
834 switch (da->da_addr.ss_family) {
835 case AF_INET:
836 ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
837 da->da_addrlen = sizeof(struct sockaddr_in);
838 match_netid = "tcp";
839 match_netid_len = 3;
840 break;
841
842 case AF_INET6:
843 ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
844 da->da_addrlen = sizeof(struct sockaddr_in6);
845 match_netid = "tcp6";
846 match_netid_len = 4;
847 startsep = "[";
848 endsep = "]";
849 break;
850
851 default:
852 dprintk("%s: unsupported address family: %u\n",
853 __func__, da->da_addr.ss_family);
854 goto out_free_da;
855 }
856
857 if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
858 dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
859 __func__, netid, match_netid);
860 goto out_free_da;
861 }
862
863 /* save human readable address */
864 len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
865 da->da_remotestr = kzalloc(len, gfp_flags);
866
867 /* NULL is ok, only used for dprintk */
868 if (da->da_remotestr)
869 snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
870 buf, endsep, ntohs(port));
871
872 dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
873 kfree(buf);
874 kfree(netid);
875 return da;
876
877out_free_da:
878 kfree(da);
879out_free_buf:
880 dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
881 kfree(buf);
882out_free_netid:
883 kfree(netid);
884out_err:
885 return NULL;
886}
887EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr);
Tom Haynes338d00c2015-02-17 14:58:15 -0800888
889void
890pnfs_layout_mark_request_commit(struct nfs_page *req,
891 struct pnfs_layout_segment *lseg,
892 struct nfs_commit_info *cinfo,
893 u32 ds_commit_idx)
894{
895 struct list_head *list;
896 struct pnfs_commit_bucket *buckets;
897
898 spin_lock(cinfo->lock);
899 buckets = cinfo->ds->buckets;
900 list = &buckets[ds_commit_idx].written;
901 if (list_empty(list)) {
902 /* Non-empty buckets hold a reference on the lseg. That ref
903 * is normally transferred to the COMMIT call and released
904 * there. It could also be released if the last req is pulled
905 * off due to a rewrite, in which case it will be done in
906 * pnfs_common_clear_request_commit
907 */
908 WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL);
909 buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg);
910 }
911 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
912 cinfo->ds->nwritten++;
913 spin_unlock(cinfo->lock);
914
915 nfs_request_add_commit_list(req, list, cinfo);
916}
917EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
Trond Myklebust5bb89b42015-03-25 14:14:42 -0400918
919int
920pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
921{
922 if (datasync)
923 return 0;
924 return pnfs_layoutcommit_inode(inode, true);
925}
926EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);
927