aboutsummaryrefslogtreecommitdiff
path: root/include/rdma
diff options
context:
space:
mode:
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/ib_umem.h34
-rw-r--r--include/rdma/ib_umem_odp.h160
-rw-r--r--include/rdma/ib_verbs.h54
3 files changed, 243 insertions, 5 deletions
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a2bf41e0bde9..2d83cfd7e6ce 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -38,11 +38,12 @@
#include <linux/workqueue.h>
struct ib_ucontext;
+struct ib_umem_odp;
struct ib_umem {
struct ib_ucontext *context;
size_t length;
- int offset;
+ unsigned long address;
int page_size;
int writable;
int hugetlb;
@@ -50,17 +51,43 @@ struct ib_umem {
struct pid *pid;
struct mm_struct *mm;
unsigned long diff;
+ struct ib_umem_odp *odp_data;
struct sg_table sg_head;
int nmap;
int npages;
};
+/* Returns the offset of the umem start relative to the first page. */
+static inline int ib_umem_offset(struct ib_umem *umem)
+{
+ return umem->address & ((unsigned long)umem->page_size - 1);
+}
+
+/* Returns the first page of an ODP umem. */
+static inline unsigned long ib_umem_start(struct ib_umem *umem)
+{
+ return umem->address - ib_umem_offset(umem);
+}
+
+/* Returns the address of the page after the last one of an ODP umem. */
+static inline unsigned long ib_umem_end(struct ib_umem *umem)
+{
+ return PAGE_ALIGN(umem->address + umem->length);
+}
+
+static inline size_t ib_umem_num_pages(struct ib_umem *umem)
+{
+ return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
+}
+
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
+int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+ size_t length);
#else /* CONFIG_INFINIBAND_USER_MEM */
@@ -73,7 +100,10 @@ static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
}
static inline void ib_umem_release(struct ib_umem *umem) { }
static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
-
+static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+ size_t length) {
+ return -EINVAL;
+}
#endif /* CONFIG_INFINIBAND_USER_MEM */
#endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
new file mode 100644
index 000000000000..3da0b167041b
--- /dev/null
+++ b/include/rdma/ib_umem_odp.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_UMEM_ODP_H
+#define IB_UMEM_ODP_H
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <linux/interval_tree.h>
+
+struct umem_odp_node {
+ u64 __subtree_last;
+ struct rb_node rb;
+};
+
+struct ib_umem_odp {
+ /*
+ * An array of the pages included in the on-demand paging umem.
+ * Indices of pages that are currently not mapped into the device will
+ * contain NULL.
+ */
+ struct page **page_list;
+ /*
+ * An array of the same size as page_list, with DMA addresses mapped
+ * for pages the pages in page_list. The lower two bits designate
+ * access permissions. See ODP_READ_ALLOWED_BIT and
+ * ODP_WRITE_ALLOWED_BIT.
+ */
+ dma_addr_t *dma_list;
+ /*
+ * The umem_mutex protects the page_list and dma_list fields of an ODP
+ * umem, allowing only a single thread to map/unmap pages. The mutex
+ * also protects access to the mmu notifier counters.
+ */
+ struct mutex umem_mutex;
+ void *private; /* for the HW driver to use. */
+
+ /* When false, use the notifier counter in the ucontext struct. */
+ bool mn_counters_active;
+ int notifiers_seq;
+ int notifiers_count;
+
+ /* A linked list of umems that don't have private mmu notifier
+ * counters yet. */
+ struct list_head no_private_counters;
+ struct ib_umem *umem;
+
+ /* Tree tracking */
+ struct umem_odp_node interval_tree;
+
+ struct completion notifier_completion;
+ int dying;
+};
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
+
+void ib_umem_odp_release(struct ib_umem *umem);
+
+/*
+ * The lower 2 bits of the DMA address signal the R/W permissions for
+ * the entry. To upgrade the permissions, provide the appropriate
+ * bitmask to the map_dma_pages function.
+ *
+ * Be aware that upgrading a mapped address might result in change of
+ * the DMA address for the page.
+ */
+#define ODP_READ_ALLOWED_BIT (1<<0ULL)
+#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
+
+#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
+
+int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
+ u64 access_mask, unsigned long current_seq);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+ u64 bound);
+
+void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root);
+void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root);
+typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+ void *cookie);
+/*
+ * Call the callback on each ib_umem in the range. Returns the logical or of
+ * the return values of the functions called.
+ */
+int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
+ umem_call_back cb, void *cookie);
+
+struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
+ u64 start, u64 last);
+struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
+ u64 start, u64 last);
+
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+ unsigned long mmu_seq)
+{
+ /*
+ * This code is strongly based on the KVM code from
+ * mmu_notifier_retry. Should be called with
+ * the relevant locks taken (item->odp_data->umem_mutex
+ * and the ucontext umem_mutex semaphore locked for read).
+ */
+
+ /* Do not allow page faults while the new ib_umem hasn't seen a state
+ * with zero notifiers yet, and doesn't have its own valid set of
+ * private counters. */
+ if (!item->odp_data->mn_counters_active)
+ return 1;
+
+ if (unlikely(item->odp_data->notifiers_count))
+ return 1;
+ if (item->odp_data->notifiers_seq != mmu_seq)
+ return 1;
+ return 0;
+}
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+static inline int ib_umem_odp_get(struct ib_ucontext *context,
+ struct ib_umem *umem)
+{
+ return -EINVAL;
+}
+
+static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+#endif /* IB_UMEM_ODP_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 470a011d6fa4..0d74f1de99aa 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -51,6 +51,7 @@
#include <uapi/linux/if_ether.h>
#include <linux/atomic.h>
+#include <linux/mmu_notifier.h>
#include <asm/uaccess.h>
extern struct workqueue_struct *ib_wq;
@@ -123,7 +124,8 @@ enum ib_device_cap_flags {
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
- IB_DEVICE_SIGNATURE_HANDOVER = (1<<30)
+ IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
+ IB_DEVICE_ON_DEMAND_PAGING = (1<<31),
};
enum ib_signature_prot_cap {
@@ -143,6 +145,27 @@ enum ib_atomic_cap {
IB_ATOMIC_GLOB
};
+enum ib_odp_general_cap_bits {
+ IB_ODP_SUPPORT = 1 << 0,
+};
+
+enum ib_odp_transport_cap_bits {
+ IB_ODP_SUPPORT_SEND = 1 << 0,
+ IB_ODP_SUPPORT_RECV = 1 << 1,
+ IB_ODP_SUPPORT_WRITE = 1 << 2,
+ IB_ODP_SUPPORT_READ = 1 << 3,
+ IB_ODP_SUPPORT_ATOMIC = 1 << 4,
+};
+
+struct ib_odp_caps {
+ uint64_t general_caps;
+ struct {
+ uint32_t rc_odp_caps;
+ uint32_t uc_odp_caps;
+ uint32_t ud_odp_caps;
+ } per_transport_caps;
+};
+
struct ib_device_attr {
u64 fw_ver;
__be64 sys_image_guid;
@@ -186,6 +209,7 @@ struct ib_device_attr {
u8 local_ca_ack_delay;
int sig_prot_cap;
int sig_guard_cap;
+ struct ib_odp_caps odp_caps;
};
enum ib_mtu {
@@ -1073,7 +1097,8 @@ enum ib_access_flags {
IB_ACCESS_REMOTE_READ = (1<<2),
IB_ACCESS_REMOTE_ATOMIC = (1<<3),
IB_ACCESS_MW_BIND = (1<<4),
- IB_ZERO_BASED = (1<<5)
+ IB_ZERO_BASED = (1<<5),
+ IB_ACCESS_ON_DEMAND = (1<<6),
};
struct ib_phys_buf {
@@ -1115,6 +1140,8 @@ struct ib_fmr_attr {
u8 page_shift;
};
+struct ib_umem;
+
struct ib_ucontext {
struct ib_device *device;
struct list_head pd_list;
@@ -1127,6 +1154,24 @@ struct ib_ucontext {
struct list_head xrcd_list;
struct list_head rule_list;
int closing;
+
+ struct pid *tgid;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ struct rb_root umem_tree;
+ /*
+ * Protects .umem_rbroot and tree, as well as odp_mrs_count and
+ * mmu notifiers registration.
+ */
+ struct rw_semaphore umem_rwsem;
+ void (*invalidate_range)(struct ib_umem *umem,
+ unsigned long start, unsigned long end);
+
+ struct mmu_notifier mn;
+ atomic_t notifier_count;
+ /* A list of umems that don't have private mmu notifier counters yet. */
+ struct list_head no_private_counters;
+ int odp_mrs_count;
+#endif
};
struct ib_uobject {
@@ -1662,7 +1707,10 @@ static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t
static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
{
- return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+ size_t copy_sz;
+
+ copy_sz = min_t(size_t, len, udata->outlen);
+ return copy_to_user(udata->outbuf, src, copy_sz) ? -EFAULT : 0;
}
/**