Page MenuHomeFreeBSD

D31149.diff
No OneTemporary

D31149.diff

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4628,6 +4628,8 @@
compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_cma.c optional ofed \
compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_core_uverbs.c optional ofed \
+ compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_cq.c optional ofed \
compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_device.c optional ofed \
@@ -4648,6 +4650,8 @@
compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_packer.c optional ofed \
compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_rdma_core.c optional ofed \
+ compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c optional ofed \
compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_sa_query.c optional ofed \
@@ -4668,10 +4672,30 @@
compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_uverbs_cmd.c optional ofed \
compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_ioctl.c optional ofed \
+ compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_uverbs_main.c optional ofed \
compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_uverbs_marshall.c optional ofed \
compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_std_types.c optional ofed \
+ compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_std_types_async_fd.c optional ofed \
+ compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_std_types_counters.c optional ofed \
+ compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_std_types_cq.c optional ofed \
+ compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_std_types_device.c optional ofed \
+ compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_std_types_dm.c optional ofed \
+ compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_std_types_flow_action.c optional ofed \
+ compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_std_types_mr.c optional ofed \
+ compile-with "${OFED_C}"
+ofed/drivers/infiniband/core/ib_uverbs_uapi.c optional ofed \
+ compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_verbs.c optional ofed \
compile-with "${OFED_C}"
@@ -4827,6 +4851,8 @@
compile-with "${OFED_C}"
dev/mlx5/mlx5_ib/mlx5_ib_cq.c optional mlx5ib pci ofed \
compile-with "${OFED_C}"
+dev/mlx5/mlx5_ib/mlx5_ib_devx.c optional mlx5ib pci ofed \
+ compile-with "${OFED_C}"
dev/mlx5/mlx5_ib/mlx5_ib_doorbell.c optional mlx5ib pci ofed \
compile-with "${OFED_C}"
dev/mlx5/mlx5_ib/mlx5_ib_gsi.c optional mlx5ib pci ofed \
diff --git a/sys/dev/cxgbe/iw_cxgbe/cq.c b/sys/dev/cxgbe/iw_cxgbe/cq.c
--- a/sys/dev/cxgbe/iw_cxgbe/cq.c
+++ b/sys/dev/cxgbe/iw_cxgbe/cq.c
@@ -883,7 +883,7 @@
return !err || err == -ENODATA ? npolled : err;
}
-int c4iw_destroy_cq(struct ib_cq *ib_cq)
+void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct c4iw_cq *chp;
struct c4iw_ucontext *ucontext;
@@ -895,22 +895,20 @@
atomic_dec(&chp->refcnt);
wait_event(chp->wait, !atomic_read(&chp->refcnt));
- ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
- : NULL;
+ ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
+ ibucontext);
destroy_cq(&chp->rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
- kfree(chp);
- return 0;
}
-struct ib_cq *
-c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_context, struct ib_udata *udata)
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct c4iw_dev *rhp;
- struct c4iw_cq *chp;
+ struct c4iw_cq *chp = to_c4iw_cq(ibcq);
struct c4iw_create_cq_resp uresp;
struct c4iw_ucontext *ucontext = NULL;
int ret;
@@ -919,17 +917,12 @@
CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
rhp = to_c4iw_dev(ibdev);
- chp = kzalloc(sizeof(*chp), GFP_KERNEL);
- if (!chp)
- return ERR_PTR(-ENOMEM);
-
-
- if (ib_context)
- ucontext = to_c4iw_ucontext(ib_context);
+ ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
+ ibucontext);
/* account for the status page. */
entries++;
@@ -1020,7 +1013,7 @@
"%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx",
__func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
(unsigned long long) chp->cq.dma_addr);
- return &chp->ibcq;
+ return 0;
err5:
kfree(mm2);
err4:
@@ -1031,8 +1024,7 @@
destroy_cq(&chp->rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
err1:
- kfree(chp);
- return ERR_PTR(ret);
+ return ret;
}
int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
diff --git a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
--- a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
+++ b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
@@ -55,6 +55,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/iw_cm.h>
+#include <rdma/uverbs_ioctl.h>
#include "common/common.h"
#include "common/t4_msg.h"
@@ -474,6 +475,14 @@
u8 send_term;
};
+struct c4iw_ib_srq {
+ struct ib_srq ibsrq;
+};
+
+struct c4iw_ib_ah {
+ struct ib_ah ibah;
+};
+
struct c4iw_qp {
struct ib_qp ibqp;
struct c4iw_dev *rhp;
@@ -501,7 +510,6 @@
u32 key;
spinlock_t mmap_lock;
struct list_head mmaps;
- struct kref kref;
};
static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
@@ -509,17 +517,6 @@
return container_of(c, struct c4iw_ucontext, ibucontext);
}
-void _c4iw_free_ucontext(struct kref *kref);
-
-static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext)
-{
- kref_put(&ucontext->kref, _c4iw_free_ucontext);
-}
-static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext)
-{
- kref_get(&ucontext->kref);
-}
-
struct c4iw_mm_entry {
struct list_head entry;
u64 addr;
@@ -938,7 +935,7 @@
void c4iw_qp_add_ref(struct ib_qp *qp);
void c4iw_qp_rem_ref(struct ib_qp *qp);
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg);
+ u32 max_num_sg, struct ib_udata *udata);
int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
int c4iw_dealloc_mw(struct ib_mw *mw);
@@ -947,16 +944,15 @@
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64
virt, int acc, struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-int c4iw_dereg_mr(struct ib_mr *ib_mr);
+int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
-int c4iw_destroy_cq(struct ib_cq *ib_cq);
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_context,
- struct ib_udata *udata);
+void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int c4iw_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-int c4iw_destroy_qp(struct ib_qp *ib_qp);
+int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata);
struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *attrs,
struct ib_udata *udata);
diff --git a/sys/dev/cxgbe/iw_cxgbe/mem.c b/sys/dev/cxgbe/iw_cxgbe/mem.c
--- a/sys/dev/cxgbe/iw_cxgbe/mem.c
+++ b/sys/dev/cxgbe/iw_cxgbe/mem.c
@@ -610,7 +610,7 @@
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
- u32 max_num_sg)
+ u32 max_num_sg, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
@@ -700,7 +700,7 @@
}
-int c4iw_dereg_mr(struct ib_mr *ib_mr)
+int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_mr *mhp;
diff --git a/sys/dev/cxgbe/iw_cxgbe/provider.c b/sys/dev/cxgbe/iw_cxgbe/provider.c
--- a/sys/dev/cxgbe/iw_cxgbe/provider.c
+++ b/sys/dev/cxgbe/iw_cxgbe/provider.c
@@ -58,16 +58,15 @@
return -ENOSYS;
}
-static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
+static int c4iw_ah_create(struct ib_ah *ah,
+ struct ib_ah_attr *ah_attr, u32 flags,
+ struct ib_udata *udata)
{
- return ERR_PTR(-ENOSYS);
+ return -ENOSYS;
}
-static int c4iw_ah_destroy(struct ib_ah *ah)
+static void c4iw_ah_destroy(struct ib_ah *ah, u32 flags)
{
- return -ENOSYS;
}
static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
@@ -93,35 +92,27 @@
return -ENOSYS;
}
-void _c4iw_free_ucontext(struct kref *kref)
+static void c4iw_dealloc_ucontext(struct ib_ucontext *context)
{
- struct c4iw_ucontext *ucontext;
+ struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
struct c4iw_dev *rhp;
struct c4iw_mm_entry *mm, *tmp;
- ucontext = container_of(kref, struct c4iw_ucontext, kref);
+ pr_debug("context %p\n", context);
rhp = to_c4iw_dev(ucontext->ibucontext.device);
CTR2(KTR_IW_CXGBE, "%s ucontext %p", __func__, ucontext);
+
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
- kfree(ucontext);
}
-static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
+static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
+ struct ib_udata *udata)
{
- struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
-
- CTR2(KTR_IW_CXGBE, "%s context %p", __func__, context);
- c4iw_put_ucontext(ucontext);
- return 0;
-}
-
-static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
-{
- struct c4iw_ucontext *context;
+ struct ib_device *ibdev = ucontext->device;
+ struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext);
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
static int warned;
struct c4iw_alloc_ucontext_resp uresp;
@@ -129,16 +120,9 @@
struct c4iw_mm_entry *mm = NULL;
PDBG("%s ibdev %p\n", __func__, ibdev);
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context) {
- ret = -ENOMEM;
- goto err;
- }
-
c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
- kref_init(&context->kref);
if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
if (!warned++)
@@ -150,7 +134,7 @@
mm = kmalloc(sizeof *mm, GFP_KERNEL);
if (!mm)
- goto err_free;
+ goto err;
uresp.status_page_size = PAGE_SIZE;
@@ -169,13 +153,11 @@
mm->len = PAGE_SIZE;
insert_mmap(context, mm);
}
- return &context->ibucontext;
+ return 0;
err_mm:
kfree(mm);
-err_free:
- kfree(context);
err:
- return ERR_PTR(ret);
+ return ret;
}
static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@@ -226,8 +208,8 @@
return ret;
}
-static int
-c4iw_deallocate_pd(struct ib_pd *pd)
+static void
+c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct c4iw_pd *php = to_c4iw_pd(pd);
struct c4iw_dev *rhp = php->rhp;
@@ -238,36 +220,29 @@
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
mutex_unlock(&rhp->rdev.stats.lock);
- kfree(php);
-
- return (0);
}
-static struct ib_pd *
-c4iw_allocate_pd(struct ib_device *ibdev, struct ib_ucontext *context,
- struct ib_udata *udata)
+static int
+c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
{
- struct c4iw_pd *php;
+ struct c4iw_pd *php = to_c4iw_pd(pd);
+ struct ib_device *ibdev = pd->device;
u32 pdid;
struct c4iw_dev *rhp;
- CTR4(KTR_IW_CXGBE, "%s: ibdev %p, context %p, data %p", __func__, ibdev,
- context, udata);
+ CTR4(KTR_IW_CXGBE, "%s: ibdev %p, pd %p, data %p", __func__, ibdev,
+ pd, udata);
rhp = (struct c4iw_dev *) ibdev;
pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
if (!pdid)
- return ERR_PTR(-EINVAL);
- php = kzalloc(sizeof(*php), GFP_KERNEL);
- if (!php) {
- c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
- return ERR_PTR(-ENOMEM);
- }
+ return -EINVAL;
+
php->pdid = pdid;
php->rhp = rhp;
- if (context) {
+ if (udata) {
if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) {
- c4iw_deallocate_pd(&php->ibpd);
- return ERR_PTR(-EFAULT);
+ c4iw_deallocate_pd(&php->ibpd, udata);
+ return -EFAULT;
}
}
mutex_lock(&rhp->rdev.stats.lock);
@@ -276,10 +251,10 @@
rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
mutex_unlock(&rhp->rdev.stats.lock);
- CTR6(KTR_IW_CXGBE,
+ CTR5(KTR_IW_CXGBE,
"%s: ibdev %p, context %p, data %p, pddid 0x%x, pd %p", __func__,
- ibdev, context, udata, pdid, php);
- return (&php->ibpd);
+ ibdev, udata, pdid, php);
+ return (0);
}
static int
@@ -436,6 +411,13 @@
ret = linux_pci_attach_device(sc->dev, NULL, NULL, &dev->pdev);
if (ret)
return (ret);
+
+#define c4iw_ib_cq c4iw_cq
+#define c4iw_ib_pd c4iw_pd
+#define c4iw_ib_qp c4iw_qp
+#define c4iw_ib_ucontext c4iw_ucontext
+ INIT_IB_DEVICE_OPS(&ibdev->ops, c4iw, CXGB4);
+
strlcpy(ibdev->name, device_get_nameunit(sc->dev), sizeof(ibdev->name));
memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid));
memcpy(&ibdev->node_guid, sc->port[0]->vi[0].hw_addr, ETHER_ADDR_LEN);
diff --git a/sys/dev/cxgbe/iw_cxgbe/qp.c b/sys/dev/cxgbe/iw_cxgbe/qp.c
--- a/sys/dev/cxgbe/iw_cxgbe/qp.c
+++ b/sys/dev/cxgbe/iw_cxgbe/qp.c
@@ -583,8 +583,6 @@
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
- if (ucontext)
- c4iw_put_ucontext(ucontext);
kfree(qhp);
}
@@ -1678,7 +1676,7 @@
return ret;
}
-int c4iw_destroy_qp(struct ib_qp *ib_qp)
+int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_qp *qhp;
@@ -1879,7 +1877,6 @@
rq_db_key_mm->len);
insert_mmap(ucontext, rq_db_key_mm);
- c4iw_get_ucontext(ucontext);
qhp->ucontext = ucontext;
}
qhp->ibqp.qp_num = qhp->wq.sq.qid;
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib.h b/sys/dev/mlx4/mlx4_ib/mlx4_ib.h
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib.h
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib.h
@@ -83,16 +83,11 @@
HW_BAR_COUNT
};
-struct mlx4_ib_vma_private_data {
- struct vm_area_struct *vma;
-};
-
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
struct list_head db_page_list;
struct mutex db_page_mutex;
- struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
};
struct mlx4_ib_pd {
@@ -726,39 +721,37 @@
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
-int mlx4_ib_dereg_mr(struct ib_mr *mr);
+int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
-struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg);
+struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata);
int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-int mlx4_ib_destroy_cq(struct ib_cq *cq);
+int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata);
+int mlx4_ib_create_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr, u32 flags,
+ struct ib_udata *udata);
+int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct ib_ah_attr *ah_attr,
+ int slave_sgid_index, u8 *s_mac, u16 vlan_tag);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
-int mlx4_ib_destroy_ah(struct ib_ah *ah);
+void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags);
-struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-int mlx4_ib_destroy_srq(struct ib_srq *srq);
+void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
@@ -766,7 +759,7 @@
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
-int mlx4_ib_destroy_qp(struct ib_qp *qp);
+int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c
@@ -42,10 +42,11 @@
#include "mlx4_ib.h"
-static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct mlx4_ib_ah *ah)
+static int create_ib_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr)
{
- struct mlx4_dev *dev = to_mdev(pd->device)->dev;
+ struct ib_pd *pd = ib_ah->pd;
+ struct mlx4_ib_ah *ah = to_mah(ib_ah);
+ struct mlx4_dev *dev = to_mdev(ib_ah->device)->dev;
ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.ib.g_slid = ah_attr->src_path_bits;
@@ -67,14 +68,14 @@
!(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
--ah->av.ib.stat_rate;
}
-
- return &ah->ibah;
+ return 0;
}
-static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct mlx4_ib_ah *ah)
+static int create_iboe_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr)
{
- struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+ struct ib_pd *pd = ib_ah->pd;
+ struct mlx4_ib_dev *ibdev = to_mdev(ib_ah->device);
+ struct mlx4_ib_ah *ah = to_mah(ib_ah);
struct mlx4_dev *dev = ibdev->dev;
int is_mcast = 0;
struct in6_addr in6;
@@ -93,7 +94,7 @@
ret = ib_get_cached_gid(pd->device, ah_attr->port_num,
ah_attr->grh.sgid_index, &sgid, &gid_attr);
if (ret)
- return ERR_PTR(ret);
+ return ret;
eth_zero_addr(ah->av.eth.s_mac);
if (gid_attr.ndev) {
vlan_tag = rdma_vlan_dev_vlan_id(gid_attr.ndev);
@@ -105,7 +106,7 @@
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
if (ret < 0)
- return ERR_PTR(ret);
+ return ret;
ah->av.eth.gid_index = ret;
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
@@ -125,23 +126,15 @@
memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29);
- return &ah->ibah;
+ return 0;
}
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
-
+int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr,
+ u32 flags, struct ib_udata *udata)
{
- struct mlx4_ib_ah *ah;
- struct ib_ah *ret;
-
- ah = kzalloc(sizeof *ah, GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
-
- if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
+ if (rdma_port_get_link_layer(ib_ah->pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
if (!(ah_attr->ah_flags & IB_AH_GRH)) {
- ret = ERR_PTR(-EINVAL);
+ return -EINVAL;
} else {
/*
* TBD: need to handle the case when we get
@@ -151,15 +144,35 @@
* local addresses which we can translate
* without going to sleep.
*/
- ret = create_iboe_ah(pd, ah_attr, ah);
+ return create_iboe_ah(ib_ah, ah_attr);
}
+ }
+ return create_ib_ah(ib_ah, ah_attr);
+}
- if (IS_ERR(ret))
- kfree(ah);
+int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct ib_ah_attr *ah_attr,
+ int slave_sgid_index, u8 *s_mac, u16 vlan_tag)
+{
+ struct ib_ah_attr slave_attr = *ah_attr;
+ struct mlx4_ib_ah *mah = to_mah(ah);
+ int ret;
+ slave_attr.grh.sgid_index = slave_sgid_index;
+ ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL);
+ if (ret)
return ret;
- } else
- return create_ib_ah(pd, ah_attr, ah); /* never fails */
+
+ /* get rid of force-loopback bit */
+ mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+
+ if (rdma_port_get_link_layer(ah->pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET)
+ memcpy(mah->av.eth.s_mac, s_mac, 6);
+
+ if (vlan_tag < 0x1000)
+ vlan_tag |= (ah_attr->sl & 7) << 13;
+ mah->av.eth.vlan = cpu_to_be16(vlan_tag);
+
+ return 0;
}
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
@@ -195,8 +208,7 @@
return 0;
}
-int mlx4_ib_destroy_ah(struct ib_ah *ah)
+void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
{
- kfree(to_mah(ah));
- return 0;
+ return;
}
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c
@@ -39,6 +39,7 @@
#include "mlx4_ib.h"
#include <rdma/mlx4-abi.h>
+#include <rdma/uverbs_ioctl.h>
static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
{
@@ -135,14 +136,16 @@
mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
-static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
- struct mlx4_ib_cq_buf *buf, struct ib_umem **umem,
- u64 buf_addr, int cqe)
+static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata,
+ struct mlx4_ib_cq_buf *buf,
+ struct ib_umem **umem, u64 buf_addr, int cqe)
{
int err;
int cqe_size = dev->dev->caps.cqe_size;
+ struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
- *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
+ *umem = ib_umem_get(&context->ibucontext, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@@ -168,27 +171,25 @@
}
#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct mlx4_ib_cq *cq;
+ struct mlx4_ib_cq *cq = to_mcq(ibcq);
struct mlx4_uar *uar;
+ void *buf_addr;
int err;
+ struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
if (entries < 1 || entries > dev->dev->caps.max_cqes)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
- return ERR_PTR(-EINVAL);
-
- cq = kmalloc(sizeof *cq, GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
entries = roundup_pow_of_two(entries + 1);
cq->ibcq.cqe = entries - 1;
@@ -200,7 +201,7 @@
INIT_LIST_HEAD(&cq->send_qp_list);
INIT_LIST_HEAD(&cq->recv_qp_list);
- if (context) {
+ if (udata) {
struct mlx4_ib_create_cq ucmd;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
@@ -208,17 +209,17 @@
goto err_cq;
}
- err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
+ buf_addr = (void *)(unsigned long)ucmd.buf_addr;
+ err = mlx4_ib_get_cq_umem(dev, udata, &cq->buf, &cq->umem,
ucmd.buf_addr, entries);
if (err)
goto err_cq;
- err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
- &cq->db);
+ err = mlx4_ib_db_map_user(context, ucmd.db_addr, &cq->db);
if (err)
goto err_mtt;
- uar = &to_mucontext(context)->uar;
+ uar = &context->uar;
} else {
err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
if (err)
@@ -233,6 +234,8 @@
if (err)
goto err_db;
+ buf_addr = &cq->buf.buf;
+
uar = &dev->priv_uar;
}
@@ -248,37 +251,33 @@
cq->mcq.comp = mlx4_ib_cq_comp;
cq->mcq.event = mlx4_ib_cq_event;
- if (context)
+ if (udata)
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
err = -EFAULT;
goto err_cq_free;
}
- return &cq->ibcq;
+ return 0;
err_cq_free:
mlx4_cq_free(dev->dev, &cq->mcq);
err_dbmap:
- if (context)
- mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
+ if (udata)
+ mlx4_ib_db_unmap_user(context, &cq->db);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
- if (context)
- ib_umem_release(cq->umem);
- else
+ ib_umem_release(cq->umem);
+ if (!udata)
mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_db:
- if (!context)
+ if (!udata)
mlx4_db_free(dev->dev, &cq->db);
-
err_cq:
- kfree(cq);
-
- return ERR_PTR(err);
+ return err;
}
static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
@@ -321,7 +320,7 @@
if (!cq->resize_buf)
return -ENOMEM;
- err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf,
+ err = mlx4_ib_get_cq_umem(dev, udata, &cq->resize_buf->buf,
&cq->resize_umem, ucmd.buf_addr, entries);
if (err) {
kfree(cq->resize_buf);
@@ -460,18 +459,15 @@
kfree(cq->resize_buf);
cq->resize_buf = NULL;
- if (cq->resize_umem) {
- ib_umem_release(cq->resize_umem);
- cq->resize_umem = NULL;
- }
-
+ ib_umem_release(cq->resize_umem);
+ cq->resize_umem = NULL;
out:
mutex_unlock(&cq->resize_mutex);
return err;
}
-int mlx4_ib_destroy_cq(struct ib_cq *cq)
+void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
struct mlx4_ib_cq *mcq = to_mcq(cq);
@@ -479,17 +475,18 @@
mlx4_cq_free(dev->dev, &mcq->mcq);
mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
- if (cq->uobject) {
- mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
- ib_umem_release(mcq->umem);
+ if (udata) {
+ mlx4_ib_db_unmap_user(
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx4_ib_ucontext,
+ ibucontext),
+ &mcq->db);
} else {
mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
mlx4_db_free(dev->dev, &mcq->db);
}
-
- kfree(mcq);
-
- return 0;
+ ib_umem_release(mcq->umem);
}
static void dump_cqe(void *cqe)
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c
@@ -199,13 +199,13 @@
ah_attr.port_num = port_num;
new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ &ah_attr, 0);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ ib_destroy_ah(dev->sm_ah[port_num - 1], 0);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -540,7 +540,7 @@
memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
attr.ah_flags = IB_AH_GRH;
}
- ah = ib_create_ah(tun_ctx->pd, &attr);
+ ah = ib_create_ah(tun_ctx->pd, &attr, 0);
if (IS_ERR(ah))
return -ENOMEM;
@@ -557,7 +557,7 @@
tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
if (tun_qp->tx_ring[tun_tx_ix].ah)
- ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+ ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0);
tun_qp->tx_ring[tun_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
@@ -632,7 +632,7 @@
spin_unlock(&tun_qp->tx_lock);
tun_qp->tx_ring[tun_tx_ix].ah = NULL;
end:
- ib_destroy_ah(ah);
+ ib_destroy_ah(ah, 0);
return ret;
}
@@ -986,7 +986,7 @@
struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0])
- ib_destroy_ah(mad_send_wc->send_buf->context[0]);
+ ib_destroy_ah(mad_send_wc->send_buf->context[0], 0);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -1041,7 +1041,7 @@
}
if (dev->sm_ah[p])
- ib_destroy_ah(dev->sm_ah[p]);
+ ib_destroy_ah(dev->sm_ah[p], 0);
}
}
@@ -1334,11 +1334,9 @@
struct ib_ah *ah;
struct ib_qp *send_qp = NULL;
unsigned wire_tx_ix = 0;
- int ret = 0;
u16 wire_pkey_ix;
int src_qpnum;
- u8 sgid_index;
-
+ int ret;
sqp_ctx = dev->sriov.sqps[port-1];
@@ -1358,16 +1356,20 @@
send_qp = sqp->qp;
- /* create ah */
- sgid_index = attr->grh.sgid_index;
- attr->grh.sgid_index = 0;
- ah = ib_create_ah(sqp_ctx->pd, attr);
- if (IS_ERR(ah))
+ ah = rdma_zalloc_drv_obj(sqp_ctx->pd->device, ib_ah);
+ if (!ah)
return -ENOMEM;
- attr->grh.sgid_index = sgid_index;
- to_mah(ah)->av.ib.gid_index = sgid_index;
- /* get rid of force-loopback bit */
- to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+
+ ah->device = sqp_ctx->pd->device;
+ ah->pd = sqp_ctx->pd;
+
+ /* create ah */
+ ret = mlx4_ib_create_ah_slave(ah, attr,
+ attr->grh.sgid_index,
+ s_mac, vlan_id);
+ if (ret)
+ goto out;
+
spin_lock(&sqp->tx_lock);
if (sqp->tx_ix_head - sqp->tx_ix_tail >=
(MLX4_NUM_TUNNEL_BUFS - 1))
@@ -1379,8 +1381,7 @@
goto out;
sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
- if (sqp->tx_ring[wire_tx_ix].ah)
- ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+ kfree(sqp->tx_ring[wire_tx_ix].ah);
sqp->tx_ring[wire_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
sqp->tx_ring[wire_tx_ix].buf.map,
@@ -1409,12 +1410,6 @@
wr.wr.num_sge = 1;
wr.wr.opcode = IB_WR_SEND;
wr.wr.send_flags = IB_SEND_SIGNALED;
- if (s_mac)
- memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
- if (vlan_id < 0x1000)
- vlan_id |= (attr->sl & 7) << 13;
- to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
-
ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
if (!ret)
@@ -1425,7 +1420,7 @@
spin_unlock(&sqp->tx_lock);
sqp->tx_ring[wire_tx_ix].ah = NULL;
out:
- ib_destroy_ah(ah);
+ kfree(ah);
return ret;
}
@@ -1684,7 +1679,7 @@
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
if (tun_qp->tx_ring[i].ah)
- ib_destroy_ah(tun_qp->tx_ring[i].ah);
+ ib_destroy_ah(tun_qp->tx_ring[i].ah, 0);
}
kfree(tun_qp->tx_ring);
kfree(tun_qp->ring);
@@ -1717,7 +1712,7 @@
"wrid=0x%llx, status=0x%x\n",
(unsigned long long)wc.wr_id, wc.status);
ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&tun_qp->tx_lock);
@@ -1734,7 +1729,7 @@
ctx->slave, wc.status, (unsigned long long)wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&tun_qp->tx_lock);
@@ -1872,8 +1867,8 @@
if (wc.status == IB_WC_SUCCESS) {
switch (wc.opcode) {
case IB_WC_SEND:
- ib_destroy_ah(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ kfree(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
@@ -1902,8 +1897,8 @@
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, (unsigned long long)wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
- ib_destroy_ah(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ kfree(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
@@ -1039,17 +1039,18 @@
return err;
}
-static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = uctx->device;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct mlx4_ib_ucontext *context;
+ struct mlx4_ib_ucontext *context = to_mucontext(uctx);
struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
if (!dev->ib_active)
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
resp_v3.qp_tab_size = dev->dev->caps.num_qps;
@@ -1063,15 +1064,9 @@
resp.cqe_size = dev->dev->caps.cqe_size;
}
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
-
err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
- if (err) {
- kfree(context);
- return ERR_PTR(err);
- }
+ if (err)
+ return err;
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@@ -1083,176 +1078,87 @@
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
- kfree(context);
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
- return &context->ibucontext;
+ return err;
}
-static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
- kfree(context);
-
- return 0;
-}
-
-static void mlx4_ib_vma_open(struct vm_area_struct *area)
-{
- /* vma_open is called when a new VMA is created on top of our VMA.
- * This is done through either mremap flow or split_vma (usually due
- * to mlock, madvise, munmap, etc.). We do not support a clone of the
- * vma, as this VMA is strongly hardware related. Therefore we set the
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
- * calling us again and trying to do incorrect actions. We assume that
- * the original vma size is exactly a single page that there will be no
- * "splitting" operations on.
- */
- area->vm_ops = NULL;
-}
-
-static void mlx4_ib_vma_close(struct vm_area_struct *area)
-{
- struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
-
- /* It's guaranteed that all VMAs opened on a FD are closed before the
- * file itself is closed, therefore no sync is needed with the regular
- * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
- * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
- * The close operation is usually called under mm->mmap_sem except when
- * process is exiting. The exiting case is handled explicitly as part
- * of mlx4_ib_disassociate_ucontext.
- */
- mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
- area->vm_private_data;
-
- /* set the vma context pointer to null in the mlx4_ib driver's private
- * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
- */
- mlx4_ib_vma_priv_data->vma = NULL;
-}
-
-static const struct vm_operations_struct mlx4_ib_vm_ops = {
- .open = mlx4_ib_vma_open,
- .close = mlx4_ib_vma_close
-};
-
-static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
- struct mlx4_ib_vma_private_data *vma_private_data)
-{
- vma_private_data->vma = vma;
- vma->vm_private_data = vma_private_data;
- vma->vm_ops = &mlx4_ib_vm_ops;
}
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
- struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
+ switch (vma->vm_pgoff) {
+ case 0:
+ return rdma_user_mmap_io(context, vma,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ NULL);
- if (vma->vm_pgoff == 0) {
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_DB].vma)
+ case 1:
+ if (dev->dev->caps.bf_reg_size == 0)
return -EINVAL;
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_mucontext(context)->uar.pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
-
- } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_BF].vma)
- return -EINVAL;
-
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_mucontext(context)->uar.pfn +
- dev->dev->caps.num_uars,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
-
- } else if (vma->vm_pgoff == 3) {
+ return rdma_user_mmap_io(
+ context, vma,
+ to_mucontext(context)->uar.pfn +
+ dev->dev->caps.num_uars,
+ PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot),
+ NULL);
+
+ case 3: {
struct mlx4_clock_params params;
int ret;
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
- return -EINVAL;
-
ret = mlx4_get_internal_clock_params(dev->dev, &params);
-
if (ret)
return ret;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- (pci_resource_start(dev->dev->persist->pdev,
- params.bar) +
- params.offset)
- >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma,
- &mucontext->hw_bar_info[HW_BAR_CLOCK]);
- } else {
- return -EINVAL;
+ return rdma_user_mmap_io(
+ context, vma,
+ (pci_resource_start(dev->dev->persist->pdev,
+ params.bar) +
+ params.offset) >>
+ PAGE_SHIFT,
+ PAGE_SIZE, pgprot_noncached(vma->vm_page_prot),
+ NULL);
}
- return 0;
+ default:
+ return -EINVAL;
+ }
}
-static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
- struct mlx4_ib_pd *pd;
+ struct mlx4_ib_pd *pd = to_mpd(ibpd);
+ struct ib_device *ibdev = ibpd->device;
int err;
- pd = kmalloc(sizeof *pd, GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
-
err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
- if (err) {
- kfree(pd);
- return ERR_PTR(err);
- }
-
- if (context)
- if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
- mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
- kfree(pd);
- return ERR_PTR(-EFAULT);
- }
+ if (err)
+ return err;
- return &pd->ibpd;
+ if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
+ mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
+ return -EFAULT;
+ }
+ return 0;
}
-static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
+static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
- kfree(pd);
-
- return 0;
}
static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_xrcd *xrcd;
@@ -1294,7 +1200,7 @@
return ERR_PTR(err);
}
-static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
ib_destroy_cq(to_mxrcd(xrcd)->cq);
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
@@ -1732,7 +1638,7 @@
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain)
+ int domain, struct ib_udata *udata)
{
int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
@@ -1747,6 +1653,10 @@
(flow_attr->type != IB_FLOW_ATTR_NORMAL))
return ERR_PTR(-EOPNOTSUPP);
+ if (udata &&
+ udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
+ return ERR_PTR(-EOPNOTSUPP);
+
memset(type, 0, sizeof(type));
mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
@@ -2550,6 +2460,7 @@
ibdev->dev = dev;
ibdev->bond_next_port = 0;
+ INIT_IB_DEVICE_OPS(&ibdev->ib_dev.ops, mlx4, MLX4);
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_mr.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_mr.c
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_mr.c
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_mr.c
@@ -323,7 +323,7 @@
}
}
-int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
+int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
int ret;
@@ -383,9 +383,8 @@
return 0;
}
-struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg)
+struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
@@ -409,7 +408,6 @@
goto err_free_mr;
mr->max_pages = max_num_sg;
-
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
goto err_free_pl;
@@ -420,6 +418,7 @@
return &mr->ibmr;
err_free_pl:
+ mr->ibmr.device = pd->device;
mlx4_free_priv_pages(mr);
err_free_mr:
(void) mlx4_mr_free(dev->dev, &mr->mmr);
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c
@@ -42,6 +42,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
+#include <rdma/uverbs_ioctl.h>
#include <dev/mlx4/cmd.h>
#include <dev/mlx4/qp.h>
@@ -1022,7 +1023,7 @@
}
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
- int is_user)
+ struct ib_udata *udata)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
unsigned long flags;
@@ -1064,7 +1065,7 @@
list_del(&qp->qps_list);
list_del(&qp->cq_send_list);
list_del(&qp->cq_recv_list);
- if (!is_user) {
+ if (!udata) {
__mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
if (send_cq != recv_cq)
@@ -1087,11 +1088,16 @@
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
- if (is_user) {
- if (qp->rq.wqe_cnt)
- mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
- &qp->db);
- ib_umem_release(qp->umem);
+ if (udata) {
+ if (qp->rq.wqe_cnt) {
+ struct mlx4_ib_ucontext *mcontext =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx4_ib_ucontext,
+ ibucontext);
+
+ mlx4_ib_db_unmap_user(mcontext, &qp->db);
+ }
} else {
kvfree(qp->sq.wrid);
kvfree(qp->rq.wrid);
@@ -1102,6 +1108,7 @@
if (qp->rq.wqe_cnt)
mlx4_db_free(dev->dev, &qp->db);
}
+ ib_umem_release(qp->umem);
del_gid_entries(qp);
}
@@ -1273,7 +1280,7 @@
return ibqp;
}
-static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1292,7 +1299,7 @@
mlx4_ib_free_qp_counter(dev, mqp);
pd = get_pd(mqp);
- destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
+ destroy_qp_common(dev, mqp, udata);
if (is_sqp(dev, mqp))
kfree(to_msqp(mqp));
@@ -1302,7 +1309,7 @@
return 0;
}
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1313,7 +1320,7 @@
ib_destroy_qp(sqp->roce_v2_gsi);
}
- return _mlx4_ib_destroy_qp(qp);
+ return _mlx4_ib_destroy_qp(qp, udata);
}
static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
@@ -1618,12 +1625,16 @@
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
- enum ib_qp_state cur_state, enum ib_qp_state new_state)
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_ib_pd *pd;
struct mlx4_ib_cq *send_cq, *recv_cq;
+ struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
int sqd_event;
@@ -1699,10 +1710,9 @@
context->param3 |= cpu_to_be32(1 << 30);
}
- if (qp->ibqp.uobject)
+ if (ucontext)
context->usr_page = cpu_to_be32(
- mlx4_to_hw_uar_index(dev->dev,
- to_mucontext(ibqp->uobject->context)->uar.index));
+ mlx4_to_hw_uar_index(dev->dev, ucontext->uar.index));
else
context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
@@ -2242,7 +2252,7 @@
goto out;
}
- err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state, udata);
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
attr->port_num = 1;
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_srq.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_srq.c
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_srq.c
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_srq.c
@@ -37,6 +37,7 @@
#include "mlx4_ib.h"
#include <rdma/mlx4-abi.h>
+#include <rdma/uverbs_ioctl.h>
static void *get_wqe(struct mlx4_ib_srq *srq, int n)
{
@@ -68,12 +69,14 @@
}
}
-struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx4_ib_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_srq *srq;
+ struct mlx4_ib_dev *dev = to_mdev(ib_srq->device);
+ struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
+ struct mlx4_ib_srq *srq = to_msrq(ib_srq);
struct mlx4_wqe_srq_next_seg *next;
struct mlx4_wqe_data_seg *scatter;
u32 cqn;
@@ -86,11 +89,7 @@
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
- return ERR_PTR(-EINVAL);
-
- srq = kmalloc(sizeof *srq, GFP_KERNEL);
- if (!srq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
@@ -105,20 +104,16 @@
buf_size = srq->msrq.max * desc_size;
- if (pd->uobject) {
+ if (udata) {
struct mlx4_ib_create_srq ucmd;
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
- err = -EFAULT;
- goto err_srq;
- }
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
- srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- buf_size, 0, 0);
- if (IS_ERR(srq->umem)) {
- err = PTR_ERR(srq->umem);
- goto err_srq;
- }
+ srq->umem =
+ ib_umem_get(&ucontext->ibucontext, ucmd.buf_addr, buf_size, 0, 0);
+ if (IS_ERR(srq->umem))
+ return PTR_ERR(srq->umem);
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
ilog2(srq->umem->page_size), &srq->mtt);
@@ -129,14 +124,14 @@
if (err)
goto err_mtt;
- err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
+ err = mlx4_ib_db_map_user(ucontext,
ucmd.db_addr, &srq->db);
if (err)
goto err_mtt;
} else {
err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
if (err)
- goto err_srq;
+ return err;
*srq->db.db = 0;
@@ -183,19 +178,19 @@
}
cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
- to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
+ to_mcq(init_attr->ext.cq)->mcq.cqn : 0;
xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
(u16) dev->dev->caps.reserved_xrcds;
- err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
- srq->db.dma, &srq->msrq);
+ err = mlx4_srq_alloc(dev->dev, to_mpd(ib_srq->pd)->pdn, cqn, xrcdn,
+ &srq->mtt, srq->db.dma, &srq->msrq);
if (err)
goto err_wrid;
srq->msrq.event = mlx4_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject)
+ if (udata)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
@@ -203,11 +198,11 @@
init_attr->attr.max_wr = srq->msrq.max - 1;
- return &srq->ibsrq;
+ return 0;
err_wrid:
- if (pd->uobject)
- mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+ if (udata)
+ mlx4_ib_db_unmap_user(ucontext, &srq->db);
else
kvfree(srq->wrid);
@@ -215,19 +210,15 @@
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
err_buf:
- if (pd->uobject)
- ib_umem_release(srq->umem);
- else
+ if (!srq->umem)
mlx4_buf_free(dev->dev, buf_size, &srq->buf);
+ ib_umem_release(srq->umem);
err_db:
- if (!pd->uobject)
+ if (!udata)
mlx4_db_free(dev->dev, &srq->db);
-err_srq:
- kfree(srq);
-
- return ERR_PTR(err);
+ return err;
}
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
@@ -274,7 +265,7 @@
return 0;
}
-int mlx4_ib_destroy_srq(struct ib_srq *srq)
+void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
@@ -282,19 +273,20 @@
mlx4_srq_free(dev->dev, &msrq->msrq);
mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
- if (srq->uobject) {
- mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
- ib_umem_release(msrq->umem);
+ if (udata) {
+ mlx4_ib_db_unmap_user(
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx4_ib_ucontext,
+ ibucontext),
+ &msrq->db);
} else {
kvfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_db_free(dev->dev, &msrq->db);
}
-
- kfree(msrq);
-
- return 0;
+ ib_umem_release(msrq->umem);
}
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
diff --git a/sys/dev/mlx5/device.h b/sys/dev/mlx5/device.h
--- a/sys/dev/mlx5/device.h
+++ b/sys/dev/mlx5/device.h
@@ -299,6 +299,7 @@
MLX5_EVENT_QUEUE_TYPE_QP = 0,
MLX5_EVENT_QUEUE_TYPE_RQ = 1,
MLX5_EVENT_QUEUE_TYPE_SQ = 2,
+ MLX5_EVENT_QUEUE_TYPE_DCT = 6,
};
enum {
@@ -502,7 +503,9 @@
};
struct mlx5_eqe_qp_srq {
- __be32 reserved[6];
+ __be32 reserved1[5];
+ u8 type;
+ u8 reserved2[3];
__be32 qp_srq_n;
};
@@ -512,6 +515,12 @@
u8 syndrome;
};
+struct mlx5_eqe_xrq_err {
+ __be32 reserved1[5];
+ __be32 type_xrqn;
+ __be32 reserved2;
+};
+
struct mlx5_eqe_port_state {
u8 reserved0[8];
u8 port;
@@ -595,6 +604,11 @@
u32 rsvd0[6];
};
+struct mlx5_eqe_dct {
+ __be32 reserved[6];
+ __be32 dctn;
+};
+
struct mlx5_eqe_temp_warning {
__be64 sensor_warning_msb;
__be64 sensor_warning_lsb;
@@ -614,7 +628,9 @@
struct mlx5_eqe_port_module_event port_module_event;
struct mlx5_eqe_vport_change vport_change;
struct mlx5_eqe_general_notification_event general_notifications;
+ struct mlx5_eqe_dct dct;
struct mlx5_eqe_temp_warning temp_warning;
+ struct mlx5_eqe_xrq_err xrq_err;
} __packed;
struct mlx5_eqe {
@@ -957,6 +973,7 @@
MLX5_CAP_DMC,
MLX5_CAP_DEC,
MLX5_CAP_TLS,
+ MLX5_CAP_DEV_EVENT = 0x14,
/* NUM OF CAP Types */
MLX5_CAP_NUM
};
@@ -1120,6 +1137,9 @@
#define MLX5_CAP_TLS(mdev, cap) \
MLX5_GET(tls_capabilities, (mdev)->hca_caps_cur[MLX5_CAP_TLS], cap)
+#define MLX5_CAP_DEV_EVENT(mdev, cap)\
+ MLX5_ADDR_OF(device_event_cap, (mdev)->hca_caps_cur[MLX5_CAP_DEV_EVENT], cap)
+
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h
--- a/sys/dev/mlx5/driver.h
+++ b/sys/dev/mlx5/driver.h
@@ -399,7 +399,7 @@
enum {
MLX5_MKEY_MR = 1,
MLX5_MKEY_MW,
- MLX5_MKEY_MR_USER,
+ MLX5_MKEY_INDIRECT_DEVX,
};
struct mlx5_core_mkey {
@@ -410,20 +410,14 @@
u32 type;
};
-struct mlx5_core_mr {
- u64 iova;
- u64 size;
- u32 key;
- u32 pd;
-};
-
enum mlx5_res_type {
MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP,
MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ,
MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ,
MLX5_RES_SRQ = 3,
MLX5_RES_XSRQ = 4,
- MLX5_RES_DCT = 5,
+ MLX5_RES_XRQ = 5,
+ MLX5_RES_DCT = MLX5_EVENT_QUEUE_TYPE_DCT,
};
struct mlx5_core_rsc_common {
@@ -477,6 +471,7 @@
struct completion free;
};
+struct mlx5_ib_dev;
struct mlx5_eq_table {
void __iomem *update_ci;
void __iomem *update_arm_ci;
@@ -485,9 +480,10 @@
struct mlx5_eq async_eq;
struct mlx5_eq cmd_eq;
int num_comp_vectors;
- /* protect EQs list
- */
- spinlock_t lock;
+ spinlock_t lock; /* protect EQs list */
+ struct mlx5_ib_dev *dev; /* for devx event notifier */
+ bool (*cb)(struct mlx5_core_dev *mdev,
+ uint8_t event_type, void *data);
};
struct mlx5_core_health {
@@ -999,21 +995,21 @@
void mlx5_init_mr_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev);
int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
- struct mlx5_core_mr *mkey,
+ struct mlx5_core_mkey *mkey,
struct mlx5_async_ctx *async_ctx, u32 *in,
int inlen, u32 *out, int outlen,
mlx5_async_cbk_t callback,
struct mlx5_async_work *context);
int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
- struct mlx5_core_mr *mr,
+ struct mlx5_core_mkey *mr,
u32 *in, int inlen);
-int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey);
-int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey,
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey);
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
u32 *out, int outlen);
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mr,
u32 *mkey);
-int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
-int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
+int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn, u16 uid);
+int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port);
void mlx5_fwp_flush(struct mlx5_fw_page *fwp);
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_eq.c b/sys/dev/mlx5/mlx5_core/mlx5_eq.c
--- a/sys/dev/mlx5/mlx5_core/mlx5_eq.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_eq.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2013-2021, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -244,7 +244,11 @@
mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
eq->eqn, eqe_type_str(eqe->type));
- switch (eqe->type) {
+
+ if (dev->priv.eq_table.cb != NULL &&
+ dev->priv.eq_table.cb(dev, eqe->type, &eqe->data)) {
+ /* FALLTHROUGH */
+ } else switch (eqe->type) {
case MLX5_EVENT_TYPE_COMP:
mlx5_cq_completion(dev, eqe);
break;
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_fw.c b/sys/dev/mlx5/mlx5_core/mlx5_fw.c
--- a/sys/dev/mlx5/mlx5_core/mlx5_fw.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_fw.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -233,6 +233,12 @@
return err;
}
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT);
+ if (err)
+ return err;
+ }
+
err = mlx5_core_query_special_contexts(dev);
if (err)
return err;
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_mr.c b/sys/dev/mlx5/mlx5_core/mlx5_mr.c
--- a/sys/dev/mlx5/mlx5_core/mlx5_mr.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_mr.c
@@ -49,7 +49,7 @@
}
int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
- struct mlx5_core_mr *mkey,
+ struct mlx5_core_mkey *mkey,
struct mlx5_async_ctx *async_ctx, u32 *in,
int inlen, u32 *out, int outlen,
mlx5_async_cbk_t callback,
@@ -111,7 +111,7 @@
EXPORT_SYMBOL(mlx5_core_create_mkey_cb);
int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
- struct mlx5_core_mr *mkey,
+ struct mlx5_core_mkey *mkey,
u32 *in, int inlen)
{
return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen,
@@ -119,12 +119,12 @@
}
EXPORT_SYMBOL(mlx5_core_create_mkey);
-int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey)
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey)
{
struct mlx5_mr_table *table = &dev->priv.mr_table;
u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
- struct mlx5_core_mr *deleted_mr;
+ struct mlx5_core_mkey *deleted_mr;
unsigned long flags;
spin_lock_irqsave(&table->lock, flags);
@@ -142,7 +142,7 @@
}
EXPORT_SYMBOL(mlx5_core_destroy_mkey);
-int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey,
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
u32 *out, int outlen)
{
u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0};
@@ -155,7 +155,7 @@
}
EXPORT_SYMBOL(mlx5_core_query_mkey);
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *_mkey,
+int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
u32 *mkey)
{
u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_pd.c b/sys/dev/mlx5/mlx5_core/mlx5_pd.c
--- a/sys/dev/mlx5/mlx5_core/mlx5_pd.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_pd.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -30,13 +30,14 @@
#include <dev/mlx5/driver.h>
#include "mlx5_core.h"
-int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
+int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {0};
u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0};
int err;
MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+ MLX5_SET(alloc_pd_in, in, uid, uid);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
@@ -47,12 +48,13 @@
}
EXPORT_SYMBOL(mlx5_core_alloc_pd);
-int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
+int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {0};
u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0};
MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, in, uid, uid);
MLX5_SET(dealloc_pd_in, in, pd, pdn);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_tls.c b/sys/dev/mlx5/mlx5_core/mlx5_tls.c
--- a/sys/dev/mlx5/mlx5_core/mlx5_tls.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_tls.c
@@ -116,5 +116,5 @@
void mlx5_tls_close_tis(struct mlx5_core_dev *mdev, u32 tisn)
{
- mlx5_core_destroy_tis(mdev, tisn);
+ mlx5_core_destroy_tis(mdev, tisn, 0);
}
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_transobj.c b/sys/dev/mlx5/mlx5_core/mlx5_transobj.c
--- a/sys/dev/mlx5/mlx5_core/mlx5_transobj.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_transobj.c
@@ -30,7 +30,7 @@
#include "mlx5_core.h"
#include "transobj.h"
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, u32 uid)
{
u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
@@ -38,6 +38,7 @@
MLX5_SET(alloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(alloc_transport_domain_in, in, uid, uid);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (!err)
@@ -47,7 +48,7 @@
return err;
}
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, u32 uid)
{
u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0};
u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
@@ -55,6 +56,7 @@
MLX5_SET(dealloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
+ MLX5_SET(dealloc_transport_domain_in, in, uid, uid);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
@@ -164,12 +166,13 @@
return err;
}
-void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
+void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u32 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {0};
u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0};
MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, in, uid, uid);
MLX5_SET(destroy_tir_in, in, tirn, tirn);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
@@ -201,12 +204,13 @@
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
-void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
+void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u32 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0};
u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, uid, uid);
MLX5_SET(destroy_tis_in, in, tisn, tisn);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
@@ -374,12 +378,13 @@
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
-void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
+void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, in, uid, uid);
MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
diff --git a/sys/dev/mlx5/mlx5_core/transobj.h b/sys/dev/mlx5/mlx5_core/transobj.h
--- a/sys/dev/mlx5/mlx5_core/transobj.h
+++ b/sys/dev/mlx5/mlx5_core/transobj.h
@@ -28,8 +28,8 @@
#ifndef __TRANSOBJ_H__
#define __TRANSOBJ_H__
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
+int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, u32 uid);
+void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, u32 uid);
int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rqn);
int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 *in, int inlen);
@@ -42,12 +42,12 @@
int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tirn);
-void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
+void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u32 uid);
int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tisn);
int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
int inlen);
-void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
+void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u32 uid);
int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rmpn);
int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
@@ -62,6 +62,6 @@
u32 *rqtn);
int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
int inlen);
-void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
+void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 uid);
#endif /* __TRANSOBJ_H__ */
diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -1021,7 +1021,7 @@
struct sx state_lock; /* Protects Interface state */
u32 pdn;
u32 tdn;
- struct mlx5_core_mr mr;
+ struct mlx5_core_mkey mr;
u32 tisn[MLX5E_MAX_TX_NUM_TC];
u32 rqtn;
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -2636,7 +2636,7 @@
static void
mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
{
- mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
+ mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc], 0);
}
static int
@@ -2965,7 +2965,7 @@
mlx5e_close_tir(struct mlx5e_priv *priv, int tt, bool inner_vxlan)
{
mlx5_core_destroy_tir(priv->mdev, inner_vxlan ?
- priv->tirn_inner_vxlan[tt] : priv->tirn[tt]);
+ priv->tirn_inner_vxlan[tt] : priv->tirn[tt], 0);
}
static int
@@ -3740,7 +3740,7 @@
static int
mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
- struct mlx5_core_mr *mkey)
+ struct mlx5_core_mkey *mkey)
{
struct ifnet *ifp = priv->ifp;
struct mlx5_core_dev *mdev = priv->mdev;
@@ -4538,12 +4538,12 @@
/* reuse mlx5core's watchdog workqueue */
priv->wq = mdev->priv.health.wq_watchdog;
- err = mlx5_core_alloc_pd(mdev, &priv->pdn);
+ err = mlx5_core_alloc_pd(mdev, &priv->pdn, 0);
if (err) {
mlx5_en_err(ifp, "mlx5_core_alloc_pd failed, %d\n", err);
goto err_free_wq;
}
- err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
+ err = mlx5_alloc_transport_domain(mdev, &priv->tdn, 0);
if (err) {
mlx5_en_err(ifp,
"mlx5_alloc_transport_domain failed, %d\n", err);
@@ -4709,10 +4709,10 @@
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
err_dealloc_transport_domain:
- mlx5_dealloc_transport_domain(mdev, priv->tdn);
+ mlx5_dealloc_transport_domain(mdev, priv->tdn, 0);
err_dealloc_pd:
- mlx5_core_dealloc_pd(mdev, priv->pdn);
+ mlx5_core_dealloc_pd(mdev, priv->pdn, 0);
err_free_wq:
flush_workqueue(priv->wq);
@@ -4808,8 +4808,8 @@
sysctl_ctx_free(&priv->sysctl_ctx);
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
- mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
- mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
+ mlx5_dealloc_transport_domain(priv->mdev, priv->tdn, 0);
+ mlx5_core_dealloc_pd(priv->mdev, priv->pdn, 0);
mlx5e_disable_async_events(priv);
flush_workqueue(priv->wq);
mlx5e_priv_static_destroy(priv, mdev, mdev->priv.eq_table.num_comp_vectors);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
@@ -634,7 +634,7 @@
static void
mlx5e_rl_close_tis(struct mlx5e_priv *priv)
{
- mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn);
+ mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn, 0);
}
static void
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h
@@ -33,6 +33,7 @@
#include <linux/printk.h>
#include <linux/netdevice.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_smi.h>
#include <dev/mlx5/cq.h>
#include <dev/mlx5/qp.h>
@@ -41,6 +42,7 @@
#include <dev/mlx5/mlx5_core/transobj.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/mlx5-abi.h>
+#include <rdma/uverbs_ioctl.h>
#define mlx5_ib_dbg(dev, format, arg...) \
pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -66,15 +68,6 @@
MLX5_IB_MMAP_CMD_MASK = 0xff,
};
-enum mlx5_ib_mmap_cmd {
- MLX5_IB_MMAP_REGULAR_PAGE = 0,
- MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
- MLX5_IB_MMAP_WC_PAGE = 2,
- MLX5_IB_MMAP_NC_PAGE = 3,
- /* 5 is chosen in order to be compatible with old versions of libmlx5 */
- MLX5_IB_MMAP_CORE_CLOCK = 5,
-};
-
enum {
MLX5_RES_SCAT_DATA32_CQE = 0x1,
MLX5_RES_SCAT_DATA64_CQE = 0x2,
@@ -109,9 +102,11 @@
MLX5_IB_INVALID_BFREG = BIT(31),
};
-struct mlx5_ib_vma_private_data {
- struct list_head list;
- struct vm_area_struct *vma;
+enum mlx5_ib_mmap_type {
+ MLX5_IB_MMAP_TYPE_MEMIC = 1,
+ MLX5_IB_MMAP_TYPE_VAR = 2,
+ MLX5_IB_MMAP_TYPE_UAR_WC = 3,
+ MLX5_IB_MMAP_TYPE_UAR_NC = 4,
};
struct mlx5_bfreg_info {
@@ -143,7 +138,9 @@
u8 cqe_version;
/* Transport Domain number */
u32 tdn;
- struct list_head vma_private_list;
+
+ u64 lib_caps;
+ u16 devx_uid;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -154,6 +151,7 @@
struct mlx5_ib_pd {
struct ib_pd ibpd;
u32 pdn;
+ u16 uid;
};
#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
@@ -279,6 +277,7 @@
struct mlx5_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_rwq_ind_tbl;
u32 rqtn;
+ u16 uid;
};
/*
@@ -358,12 +357,18 @@
spinlock_t lock32;
};
+struct mlx5_ib_dct {
+ struct mlx5_core_dct mdct;
+ u32 *in;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
struct mlx5_ib_qp_trans trans_qp;
struct mlx5_ib_raw_packet_qp raw_packet_qp;
struct mlx5_ib_rss_qp rss_qp;
+ struct mlx5_ib_dct dct;
};
struct mlx5_buf buf;
@@ -433,6 +438,7 @@
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
MLX5_IB_QP_RSS = 1 << 8,
+ MLX5_IB_QP_UNDERLAY = 1 << 10,
};
struct mlx5_umr_wr {
@@ -517,6 +523,13 @@
MLX5_IB_MTT_WRITE = (1 << 1),
};
+struct mlx5_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u8 mmap_flag;
+ u64 address;
+ u32 page_idx;
+};
+
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
struct mlx5_ib_mr {
@@ -527,7 +540,7 @@
int max_descs;
int desc_size;
int access_mode;
- struct mlx5_core_mr mmkey;
+ struct mlx5_core_mkey mmkey;
struct ib_umem *umem;
struct mlx5_shared_mr_info *smr_info;
struct list_head list;
@@ -545,7 +558,12 @@
struct mlx5_ib_mw {
struct ib_mw ibmw;
- struct mlx5_core_mr mmkey;
+ struct mlx5_core_mkey mmkey;
+};
+
+struct mlx5_ib_devx_mr {
+ struct mlx5_core_mkey mmkey;
+ int ndescs;
};
struct mlx5_ib_umr_context {
@@ -730,6 +748,12 @@
};
};
+struct mlx5_devx_event_table {
+ /* serialize updating the event_xa */
+ struct mutex event_xa_lock;
+ struct xarray event_xa;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
@@ -739,7 +763,8 @@
/* serialize update of capability mask
*/
struct mutex cap_mask_mutex;
- bool ib_active;
+ u8 ib_active:1;
+ u8 wc_support:1;
struct umr_common umrc;
/* sync used page count stats
*/
@@ -766,6 +791,7 @@
struct mlx5_sq_bfreg bfreg;
struct mlx5_sq_bfreg wc_bfreg;
struct mlx5_sq_bfreg fp_bfreg;
+ struct mlx5_devx_event_table devx_event_table;
struct mlx5_ib_congestion congestion;
struct mlx5_async_ctx async_ctx;
@@ -786,6 +812,14 @@
return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
}
+static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata)
+{
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+
+ return to_mdev(context->ibucontext.device);
+}
+
static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct mlx5_ib_cq, ibcq);
@@ -801,7 +835,7 @@
return container_of(core_qp, struct mlx5_ib_rwq, core_qp);
}
-static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmkey)
+static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
{
return container_of(mmkey, struct mlx5_ib_mr, mmkey);
}
@@ -856,6 +890,13 @@
return container_of(ibah, struct mlx5_ib_ah, ibah);
}
+static inline struct mlx5_user_mmap_entry *
+to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry,
+ struct mlx5_user_mmap_entry, rdma_entry);
+}
+
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db);
void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
@@ -865,17 +906,16 @@
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+int mlx5_ib_create_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr, u32 flags,
struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
-int mlx5_ib_destroy_ah(struct ib_ah *ah);
-struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata);
+void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
-int mlx5_ib_destroy_srq(struct ib_srq *srq);
+void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
@@ -885,7 +925,7 @@
int attr_mask, struct ib_udata *udata);
int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
-int mlx5_ib_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
@@ -894,11 +934,9 @@
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
void *buffer, u32 length,
struct mlx5_ib_qp_base *base);
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-int mlx5_ib_destroy_cq(struct ib_cq *cq);
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
@@ -915,10 +953,9 @@
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
-struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg);
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata);
int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -927,9 +964,8 @@
struct ib_mad_hdr *out, size_t *out_mad_size,
u16 *out_mad_pkey_index);
struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+ struct ib_udata *udata);
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
@@ -971,7 +1007,7 @@
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-int mlx5_ib_destroy_wq(struct ib_wq *wq);
+void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
@@ -1043,6 +1079,28 @@
void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi,
int bfregn);
+
+#if 1 /* IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) */
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
+void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev);
+bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id);
+#else
+static inline int
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+ bool is_user) { return -EOPNOTSUPP; }
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
+static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {}
+static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
+ int *dest_type)
+{
+ return false;
+}
+#endif
+
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_ah.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_ah.c
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_ah.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_ah.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,7 +27,7 @@
#include "mlx5_ib.h"
-static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
+static void create_ib_ah(struct mlx5_ib_dev *dev,
struct mlx5_ib_ah *ah,
struct ib_ah_attr *ah_attr,
enum rdma_link_layer ll)
@@ -55,22 +55,20 @@
ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
}
-
- return &ah->ibah;
}
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
+int mlx5_ib_create_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr,
+ u32 flags, struct ib_udata *udata)
{
- struct mlx5_ib_ah *ah;
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_ah *ah = to_mah(ibah);
+ struct mlx5_ib_dev *dev = to_mdev(ibah->device);
enum rdma_link_layer ll;
- ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+ ll = dev->ib_dev.get_link_layer(&dev->ib_dev, ah_attr->port_num);
if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (ll == IB_LINK_LAYER_ETHERNET && udata) {
int err;
@@ -79,25 +77,22 @@
sizeof(resp.dmac);
if (udata->outlen < min_resp_len)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
resp.response_length = min_resp_len;
- err = ib_resolve_eth_dmac(pd->device, ah_attr);
+ err = ib_resolve_eth_dmac(&dev->ib_dev, ah_attr);
if (err)
- return ERR_PTR(err);
+ return err;
memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- return ERR_PTR(err);
+ return err;
}
- ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
-
- return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
+ create_ib_ah(dev, ah, ah_attr, ll);
+ return 0;
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
@@ -123,8 +118,7 @@
return 0;
}
-int mlx5_ib_destroy_ah(struct ib_ah *ah)
+void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
{
- kfree(to_mah(ah));
- return 0;
+ return;
}
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2013-2020, Mellanox Technologies. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -29,6 +29,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
+#include <rdma/uverbs_ioctl.h>
#include "mlx5_ib.h"
static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe __unused)
@@ -519,7 +520,7 @@
struct mlx5_core_qp *mqp;
struct mlx5_ib_wq *wq;
struct mlx5_sig_err_cqe *sig_err_cqe;
- struct mlx5_core_mr *mmkey;
+ struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
unsigned long flags;
uint8_t opcode;
@@ -741,11 +742,10 @@
}
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
- struct ib_ucontext *context, struct mlx5_ib_cq *cq,
- int entries, u32 **cqb,
+ struct mlx5_ib_cq *cq, int entries, u32 **cqb,
int *cqe_size, int *index, int *inlen)
{
- struct mlx5_ib_create_cq ucmd;
+ struct mlx5_ib_create_cq ucmd = {};
size_t ucmdlen;
int page_shift;
__be64 *pas;
@@ -753,6 +753,8 @@
int ncont;
void *cqc;
int err;
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
ucmdlen = min(udata->inlen, sizeof(ucmd));
if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
@@ -769,7 +771,7 @@
*cqe_size = ucmd.cqe_size;
- cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
+ cq->buf.umem = ib_umem_get(&context->ibucontext, ucmd.buf_addr,
entries * ucmd.cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(cq->buf.umem)) {
@@ -777,7 +779,7 @@
return err;
}
- err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
+ err = mlx5_ib_db_map_user(context, ucmd.db_addr,
&cq->db);
if (err)
goto err_umem;
@@ -804,29 +806,33 @@
if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
*index = ucmd.uar_page_index;
- } else if (to_mucontext(context)->bfregi.lib_uar_dyn) {
+ } else if (context->bfregi.lib_uar_dyn) {
err = -EINVAL;
goto err_cqb;
} else {
- *index = to_mucontext(context)->bfregi.sys_pages[0];
+ *index = context->bfregi.sys_pages[0];
}
+ MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid);
return 0;
err_cqb:
kvfree(*cqb);
err_db:
- mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+ mlx5_ib_db_unmap_user(context, &cq->db);
err_umem:
ib_umem_release(cq->buf.umem);
return err;
}
-static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
+static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata)
{
- mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+
+ mlx5_ib_db_unmap_user(context, &cq->db);
ib_umem_release(cq->buf.umem);
}
@@ -906,16 +912,15 @@
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
- struct mlx5_ib_cq *cq;
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
int uninitialized_var(index);
int uninitialized_var(inlen);
u32 *cqb = NULL;
@@ -927,18 +932,14 @@
if (entries < 0 ||
(entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (check_cq_create_flags(attr->flags))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
- return ERR_PTR(-EINVAL);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
cq->ibcq.cqe = entries - 1;
mutex_init(&cq->resize_mutex);
@@ -949,17 +950,17 @@
INIT_LIST_HEAD(&cq->list_send_qp);
INIT_LIST_HEAD(&cq->list_recv_qp);
- if (context) {
- err = create_cq_user(dev, udata, context, cq, entries,
- &cqb, &cqe_size, &index, &inlen);
+ if (udata) {
+ err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
+ &index, &inlen);
if (err)
- goto err_create;
+ return err;
} else {
cqe_size = cache_line_size() == 128 ? 128 : 64;
err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
&index, &inlen);
if (err)
- goto err_create;
+ return err;
INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
@@ -990,7 +991,7 @@
INIT_LIST_HEAD(&cq->wc_list);
- if (context)
+ if (udata)
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
err = -EFAULT;
goto err_cmd;
@@ -998,43 +999,30 @@
kvfree(cqb);
- return &cq->ibcq;
+ return 0;
err_cmd:
mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
err_cqb:
kvfree(cqb);
- if (context)
- destroy_cq_user(cq, context);
+ if (udata)
+ destroy_cq_user(cq, udata);
else
destroy_cq_kernel(dev, cq);
-
-err_create:
- kfree(cq);
-
- return ERR_PTR(err);
+ return err;
}
-
-int mlx5_ib_destroy_cq(struct ib_cq *cq)
+void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
- struct ib_ucontext *context = NULL;
-
- if (cq->uobject)
- context = cq->uobject->context;
mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
- if (context)
- destroy_cq_user(mcq, context);
+ if (udata)
+ destroy_cq_user(mcq, udata);
else
destroy_cq_kernel(dev, mcq);
-
- kfree(mcq);
-
- return 0;
}
static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_devx.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_devx.c
new file mode 100644
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_devx.c
@@ -0,0 +1,2930 @@
+/*-
+ * Copyright (c) 2018-2020, Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_std_types.h>
+#include <dev/mlx5/driver.h>
+#include <dev/mlx5/fs.h>
+#include "mlx5_ib.h"
+
+#include <sys/priv.h>
+
+#include <linux/xarray.h>
+#include <linux/rculist.h>
+#include <linux/srcu.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static void dispatch_event_fd(struct list_head *fd_list, const void *data);
+
+enum devx_obj_flags {
+ DEVX_OBJ_FLAGS_DCT = 1 << 1,
+ DEVX_OBJ_FLAGS_CQ = 1 << 2,
+};
+
+struct devx_async_data {
+ struct mlx5_ib_dev *mdev;
+ struct list_head list;
+ struct devx_async_cmd_event_file *ev_file;
+ struct mlx5_async_work cb_work;
+ u16 cmd_out_len;
+ /* must be last field in this structure */
+ struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
+};
+
+struct devx_async_event_data {
+ struct list_head list; /* headed in ev_file->event_list */
+ struct mlx5_ib_uapi_devx_async_event_hdr hdr;
+};
+
+/* first level XA value data structure */
+struct devx_event {
+ struct xarray object_ids; /* second XA level, Key = object id */
+ struct list_head unaffiliated_list;
+};
+
+/* second level XA value data structure */
+struct devx_obj_event {
+ struct rcu_head rcu;
+ struct list_head obj_sub_list;
+};
+
+struct devx_event_subscription {
+ struct list_head file_list; /* headed in ev_file->
+ * subscribed_events_list
+ */
+ struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
+ * devx_obj_event->obj_sub_list
+ */
+ struct list_head obj_list; /* headed in devx_object */
+ struct list_head event_list; /* headed in ev_file->event_list or in
+ * temp list via subscription
+ */
+
+ u8 is_cleaned:1;
+ u32 xa_key_level1;
+ u32 xa_key_level2;
+ struct rcu_head rcu;
+ u64 cookie;
+ struct devx_async_event_file *ev_file;
+ struct fd eventfd;
+};
+
+struct devx_async_event_file {
+ struct ib_uobject uobj;
+ /* Head of events that are subscribed to this FD */
+ struct list_head subscribed_events_list;
+ spinlock_t lock;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+ struct mlx5_ib_dev *dev;
+ u8 omit_data:1;
+ u8 is_overflow_err:1;
+ u8 is_destroyed:1;
+};
+
+#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
+struct devx_obj {
+ struct mlx5_ib_dev *ib_dev;
+ u64 obj_id;
+ u32 dinlen; /* destroy inbox length */
+ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+ u32 flags;
+ union {
+ struct mlx5_ib_devx_mr devx_mr;
+ struct mlx5_core_dct core_dct;
+ struct mlx5_core_cq core_cq;
+ u32 flow_counter_bulk_size;
+ };
+ struct list_head event_sub; /* holds devx_event_subscription entries */
+};
+
+struct devx_umem {
+ struct mlx5_core_dev *mdev;
+ struct ib_umem *umem;
+ u32 page_offset;
+ int page_shift;
+ int ncont;
+ u32 dinlen;
+ u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
+};
+
+struct devx_umem_reg_cmd {
+ void *in;
+ u32 inlen;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+};
+
+static struct mlx5_ib_ucontext *
+devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
+{
+ return to_mucontext(ib_uverbs_get_ucontext(attrs));
+}
+
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+{
+ u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+ void *uctx;
+ int err;
+ u16 uid;
+ u32 cap = 0;
+
+ /* 0 means not supported */
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
+ return -EINVAL;
+
+ uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
+ if (is_user && priv_check(curthread, PRIV_NET_RAW) == 0 &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
+ cap |= MLX5_UCTX_CAP_RAW_TX;
+ if (is_user && priv_check(curthread, PRIV_DRIVER) == 0 &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_INTERNAL_DEV_RES))
+ cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
+
+ MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
+ MLX5_SET(uctx, uctx, cap, cap);
+
+ err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ return uid;
+}
+
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+
+ MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
+ MLX5_SET(destroy_uctx_in, in, uid, uid);
+
+ mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_DESTROY_TIR:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
+ obj_id);
+ return true;
+
+ case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
+ table_id);
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+
+ if (offset && offset >= devx_obj->flow_counter_bulk_size)
+ return false;
+
+ *counter_id = MLX5_GET(dealloc_flow_counter_in,
+ devx_obj->dinbox,
+ flow_counter_id);
+ *counter_id += offset;
+ return true;
+ }
+
+ return false;
+}
+
+static bool is_legacy_unaffiliated_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_legacy_obj_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_COMP:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static u16 get_legacy_obj_type(u16 opcode)
+{
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_EVENT_QUEUE_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_EVENT_QUEUE_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_EVENT_QUEUE_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
+{
+ u16 opcode;
+
+ opcode = (obj->obj_id >> 32) & 0xffff;
+
+ if (is_legacy_obj_event_num(event_num))
+ return get_legacy_obj_type(opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJ:
+ return (obj->obj_id >> 48);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_OBJ_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_OBJ_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_OBJ_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_OBJ_TYPE_DCT;
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_OBJ_TYPE_TIR;
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_OBJ_TYPE_TIS;
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_OBJ_TYPE_PSV;
+ case MLX5_OBJ_TYPE_MKEY:
+ return MLX5_OBJ_TYPE_MKEY;
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_OBJ_TYPE_RMP;
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_OBJ_TYPE_XRC_SRQ;
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_OBJ_TYPE_XRQ;
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_OBJ_TYPE_RQT;
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_OBJ_TYPE_FLOW_COUNTER;
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_OBJ_TYPE_CQ;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
+{
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return eqe->data.qp_srq.type;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ return 0;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
+ }
+}
+
+static u32 get_dec_obj_id(u64 obj_id)
+{
+ return (obj_id & 0xffffffff);
+}
+
+/*
+ * As the obj_id in the firmware is not globally unique the object type
+ * must be considered upon checking for a valid object id.
+ * For that the opcode of the creator command is encoded as part of the obj_id.
+ */
+static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
+{
+ return ((u64)opcode << 32) | obj_id;
+}
+
+static u64 devx_get_obj_id(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+ u64 obj_id;
+
+ switch (opcode) {
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJ:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJ |
+ MLX5_GET(general_obj_in_cmd_hdr, in,
+ obj_type) << 16,
+ MLX5_GET(general_obj_in_cmd_hdr, in,
+ obj_id));
+ break;
+ case MLX5_CMD_OP_QUERY_MKEY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
+ MLX5_GET(query_mkey_in, in,
+ mkey_index));
+ break;
+ case MLX5_CMD_OP_QUERY_CQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ MLX5_GET(query_cq_in, in, cqn));
+ break;
+ case MLX5_CMD_OP_MODIFY_CQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ MLX5_GET(modify_cq_in, in, cqn));
+ break;
+ case MLX5_CMD_OP_QUERY_SQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ MLX5_GET(query_sq_in, in, sqn));
+ break;
+ case MLX5_CMD_OP_MODIFY_SQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ MLX5_GET(modify_sq_in, in, sqn));
+ break;
+ case MLX5_CMD_OP_QUERY_RQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(query_rq_in, in, rqn));
+ break;
+ case MLX5_CMD_OP_MODIFY_RQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(modify_rq_in, in, rqn));
+ break;
+ case MLX5_CMD_OP_QUERY_RMP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+ MLX5_GET(query_rmp_in, in, rmpn));
+ break;
+ case MLX5_CMD_OP_MODIFY_RMP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+ MLX5_GET(modify_rmp_in, in, rmpn));
+ break;
+ case MLX5_CMD_OP_QUERY_RQT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ MLX5_GET(query_rqt_in, in, rqtn));
+ break;
+ case MLX5_CMD_OP_MODIFY_RQT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ MLX5_GET(modify_rqt_in, in, rqtn));
+ break;
+ case MLX5_CMD_OP_QUERY_TIR:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ MLX5_GET(query_tir_in, in, tirn));
+ break;
+ case MLX5_CMD_OP_MODIFY_TIR:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ MLX5_GET(modify_tir_in, in, tirn));
+ break;
+ case MLX5_CMD_OP_QUERY_TIS:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ MLX5_GET(query_tis_in, in, tisn));
+ break;
+ case MLX5_CMD_OP_MODIFY_TIS:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ MLX5_GET(modify_tis_in, in, tisn));
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+ MLX5_GET(query_flow_table_in, in,
+ table_id));
+ break;
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+ MLX5_GET(modify_flow_table_in, in,
+ table_id));
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
+ MLX5_GET(query_flow_group_in, in,
+ group_id));
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+ MLX5_GET(query_fte_in, in,
+ flow_index));
+ break;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+ MLX5_GET(set_fte_in, in, flow_index));
+ break;
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
+ MLX5_GET(query_q_counter_in, in,
+ counter_set_id));
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
+ MLX5_GET(query_flow_counter_in, in,
+ flow_counter_id));
+ break;
+ case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
+ MLX5_GET(general_obj_in_cmd_hdr, in,
+ obj_id));
+ break;
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+ MLX5_GET(query_scheduling_element_in,
+ in, scheduling_element_id));
+ break;
+ case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+ MLX5_GET(modify_scheduling_element_in,
+ in, scheduling_element_id));
+ break;
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
+ MLX5_GET(add_vxlan_udp_dport_in, in,
+ vxlan_udp_port));
+ break;
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+ MLX5_GET(query_l2_table_entry_in, in,
+ table_index));
+ break;
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+ MLX5_GET(set_l2_table_entry_in, in,
+ table_index));
+ break;
+ case MLX5_CMD_OP_QUERY_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(query_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_RST2INIT_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rst2init_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(init2rtr_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rtr2rts_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rts2rts_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(sqerr2rts_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(qp_2err_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_2RST_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(qp_2rst_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_QUERY_DCT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ MLX5_GET(query_dct_in, in, dctn));
+ break;
+ case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+ MLX5_GET(query_xrq_in, in, xrqn));
+ break;
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+ MLX5_GET(query_xrc_srq_in, in,
+ xrc_srqn));
+ break;
+ case MLX5_CMD_OP_ARM_XRC_SRQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+ MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
+ break;
+ case MLX5_CMD_OP_QUERY_SRQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
+ MLX5_GET(query_srq_in, in, srqn));
+ break;
+ case MLX5_CMD_OP_ARM_RQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(arm_rq_in, in, srq_number));
+ break;
+ case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ MLX5_GET(drain_dct_in, in, dctn));
+ break;
+ case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+ MLX5_GET(arm_xrq_in, in, xrqn));
+ break;
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+ obj_id = get_enc_obj_id
+ (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
+ MLX5_GET(query_packet_reformat_context_in,
+ in, packet_reformat_id));
+ break;
+ default:
+ obj_id = 0;
+ }
+
+ return obj_id;
+}
+
+static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
+ struct ib_uobject *uobj, const void *in)
+{
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ u64 obj_id = devx_get_obj_id(in);
+
+ if (!obj_id)
+ return false;
+
+ switch (uobj_get_object_id(uobj)) {
+ case UVERBS_OBJECT_CQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ to_mcq(uobj->object)->mcq.cqn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_SRQ:
+ {
+ struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
+ u16 opcode;
+
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
+ break;
+ case MLX5_RES_XRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRQ;
+ break;
+ default:
+ if (!dev->mdev->issi)
+ opcode = MLX5_CMD_OP_CREATE_SRQ;
+ else
+ opcode = MLX5_CMD_OP_CREATE_RMP;
+ }
+
+ return get_enc_obj_id(opcode,
+ to_msrq(uobj->object)->msrq.srqn) ==
+ obj_id;
+ }
+
+ case UVERBS_OBJECT_QP:
+ {
+ struct mlx5_ib_qp *qp = to_mqp(uobj->object);
+ enum ib_qp_type qp_type = qp->ibqp.qp_type;
+
+ if (qp_type == IB_QPT_RAW_PACKET ||
+ (qp->flags & MLX5_IB_QP_UNDERLAY)) {
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp =
+ &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+
+ return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ rq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ sq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ rq->tirn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ sq->tisn) == obj_id);
+ }
+
+ if (qp_type == MLX5_IB_QPT_DCT)
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ qp->dct.mdct.dctn) == obj_id;
+
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ qp->ibqp.qp_num) == obj_id;
+ }
+
+ case UVERBS_OBJECT_WQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ to_mrwq(uobj->object)->core_qp.qpn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_RWQ_IND_TBL:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ to_mrwq_ind_table(uobj->object)->rqtn) ==
+ obj_id;
+
+ case MLX5_IB_OBJECT_DEVX_OBJ:
+ return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
+
+ default:
+ return false;
+ }
+}
+
+static void devx_set_umem_valid(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_MKEY:
+ MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ break;
+ case MLX5_CMD_OP_CREATE_CQ:
+ {
+ void *cqc;
+
+ MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
+ break;
+ }
+ case MLX5_CMD_OP_CREATE_QP:
+ {
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
+ MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_RQ:
+ {
+ void *rqc, *wq;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_SQ:
+ {
+ void *sqc, *wq;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_MODIFY_CQ:
+ MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
+ break;
+
+ case MLX5_CMD_OP_CREATE_RMP:
+ {
+ void *rmpc, *wq;
+
+ rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_XRQ:
+ {
+ void *xrqc, *wq;
+
+ xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
+ wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ {
+ void *xrc_srqc;
+
+ MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
+ xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
+ xrc_srq_context_entry);
+ MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
+{
+ *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (*opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJ:
+ case MLX5_CMD_OP_CREATE_MKEY:
+ case MLX5_CMD_OP_CREATE_CQ:
+ case MLX5_CMD_OP_ALLOC_PD:
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ case MLX5_CMD_OP_CREATE_RMP:
+ case MLX5_CMD_OP_CREATE_SQ:
+ case MLX5_CMD_OP_CREATE_RQ:
+ case MLX5_CMD_OP_CREATE_RQT:
+ case MLX5_CMD_OP_CREATE_TIR:
+ case MLX5_CMD_OP_CREATE_TIS:
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_CREATE_QP:
+ case MLX5_CMD_OP_CREATE_SRQ:
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ case MLX5_CMD_OP_CREATE_DCT:
+ case MLX5_CMD_OP_CREATE_XRQ:
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ return true;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ {
+ u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+ if (op_mod == 0)
+ return true;
+ return false;
+ }
+ case MLX5_CMD_OP_CREATE_PSV:
+ {
+ u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
+
+ if (num_psv == 1)
+ return true;
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+static bool devx_is_obj_modify_cmd(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJ:
+ case MLX5_CMD_OP_MODIFY_CQ:
+ case MLX5_CMD_OP_MODIFY_RMP:
+ case MLX5_CMD_OP_MODIFY_SQ:
+ case MLX5_CMD_OP_MODIFY_RQ:
+ case MLX5_CMD_OP_MODIFY_RQT:
+ case MLX5_CMD_OP_MODIFY_TIR:
+ case MLX5_CMD_OP_MODIFY_TIS:
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_RST2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ case MLX5_CMD_OP_2ERR_QP:
+ case MLX5_CMD_OP_2RST_QP:
+ case MLX5_CMD_OP_ARM_XRC_SRQ:
+ case MLX5_CMD_OP_ARM_RQ:
+ case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
+ return true;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ {
+ u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+
+ if (op_mod == 1)
+ return true;
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+static bool devx_is_obj_query_cmd(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJ:
+ case MLX5_CMD_OP_QUERY_MKEY:
+ case MLX5_CMD_OP_QUERY_CQ:
+ case MLX5_CMD_OP_QUERY_RMP:
+ case MLX5_CMD_OP_QUERY_SQ:
+ case MLX5_CMD_OP_QUERY_RQ:
+ case MLX5_CMD_OP_QUERY_RQT:
+ case MLX5_CMD_OP_QUERY_TIR:
+ case MLX5_CMD_OP_QUERY_TIS:
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_QP:
+ case MLX5_CMD_OP_QUERY_SRQ:
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ case MLX5_CMD_OP_QUERY_DCT:
+ case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool devx_is_whitelist_cmd(void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
+{
+ if (devx_is_whitelist_cmd(cmd_in)) {
+ if (c->devx_uid)
+ return c->devx_uid;
+
+ return -EOPNOTSUPP;
+ }
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ return c->devx_uid;
+}
+
+static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
+ if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
+ MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
+ (opcode >= MLX5_CMD_OP_GENERAL_START &&
+ opcode < MLX5_CMD_OP_GENERAL_END))
+ return true;
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_VPORT_STATE:
+ case MLX5_CMD_OP_QUERY_ADAPTER:
+ case MLX5_CMD_OP_QUERY_ISSI:
+ case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+ case MLX5_CMD_OP_QUERY_VNIC_ENV:
+ case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+ case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
+ case MLX5_CMD_OP_NOP:
+ case MLX5_CMD_OP_QUERY_CONG_STATUS:
+ case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+ case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_QUERY_LAG:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ int user_vector;
+ int dev_eqn;
+ unsigned int irqn;
+ int err;
+
+ if (uverbs_copy_from(&user_vector, attrs,
+ MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
+ return -EFAULT;
+
+ c = devx_ufile2uctx(attrs);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+
+ err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
+ if (err < 0)
+ return err;
+
+ if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+ &dev_eqn, sizeof(dev_eqn)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ *Security note:
+ * The hardware protection mechanism works like this: Each device object that
+ * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
+ * the device specification manual) upon its creation. Then upon doorbell,
+ * hardware fetches the object context for which the doorbell was rang, and
+ * validates that the UAR through which the DB was rang matches the UAR ID
+ * of the object.
+ * If no match the doorbell is silently ignored by the hardware. Of course,
+ * the user cannot ring a doorbell on a UAR that was not mapped to it.
+ * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
+ * mailboxes (except tagging them with UID), we expose to the user its UAR
+ * ID, so it can embed it in these objects in the expected specification
+ * format. So the only thing the user can do is hurt itself by creating a
+ * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
+ * may ring a doorbell on its objects.
+ * The consequence of that will be that another user can schedule a QP/SQ
+ * of the buggy user for execution (just insert it to the hardware schedule
+ * queue or arm its CQ for event generation), no further harm is expected.
+ */
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ u32 user_idx;
+ s32 dev_idx;
+
+ c = devx_ufile2uctx(attrs);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+
+ if (uverbs_copy_from(&user_idx, attrs,
+ MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
+ return -EFAULT;
+
+ dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
+ if (dev_idx < 0)
+ return dev_idx;
+
+ if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+ &dev_idx, sizeof(dev_idx)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ void *cmd_in = uverbs_attr_get_alloced_ptr(
+ attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
+ void *cmd_out;
+ int err;
+ int uid;
+
+ c = devx_ufile2uctx(attrs);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ /* Only white list of some general HCA commands are allowed for this method. */
+ if (!devx_is_general_cmd(cmd_in, dev))
+ return -EINVAL;
+
+ cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+ if (IS_ERR(cmd_out))
+ return PTR_ERR(cmd_out);
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ err = mlx5_cmd_exec(dev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err)
+ return err;
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
+ cmd_out_len);
+}
+
+static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
+ u32 *dinlen,
+ u32 *obj_id)
+{
+ u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
+ u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
+
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
+
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
+
+ switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJ);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
+ break;
+
+ case MLX5_CMD_OP_CREATE_UMEM:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_UMEM);
+ break;
+ case MLX5_CMD_OP_CREATE_MKEY:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+ break;
+ case MLX5_CMD_OP_CREATE_CQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ break;
+ case MLX5_CMD_OP_ALLOC_PD:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ break;
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ break;
+ case MLX5_CMD_OP_CREATE_RMP:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ break;
+ case MLX5_CMD_OP_CREATE_SQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ break;
+ case MLX5_CMD_OP_CREATE_RQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ break;
+ case MLX5_CMD_OP_CREATE_RQT:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ break;
+ case MLX5_CMD_OP_CREATE_TIR:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ break;
+ case MLX5_CMD_OP_CREATE_TIS:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ break;
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ break;
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
+ *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
+ MLX5_SET(destroy_flow_table_in, din, other_vport,
+ MLX5_GET(create_flow_table_in, in, other_vport));
+ MLX5_SET(destroy_flow_table_in, din, vport_number,
+ MLX5_GET(create_flow_table_in, in, vport_number));
+ MLX5_SET(destroy_flow_table_in, din, table_type,
+ MLX5_GET(create_flow_table_in, in, table_type));
+ MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+ break;
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
+ *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
+ MLX5_SET(destroy_flow_group_in, din, other_vport,
+ MLX5_GET(create_flow_group_in, in, other_vport));
+ MLX5_SET(destroy_flow_group_in, din, vport_number,
+ MLX5_GET(create_flow_group_in, in, vport_number));
+ MLX5_SET(destroy_flow_group_in, din, table_type,
+ MLX5_GET(create_flow_group_in, in, table_type));
+ MLX5_SET(destroy_flow_group_in, din, table_id,
+ MLX5_GET(create_flow_group_in, in, table_id));
+ MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+ break;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
+ *obj_id = MLX5_GET(set_fte_in, in, flow_index);
+ MLX5_SET(delete_fte_in, din, other_vport,
+ MLX5_GET(set_fte_in, in, other_vport));
+ MLX5_SET(delete_fte_in, din, vport_number,
+ MLX5_GET(set_fte_in, in, vport_number));
+ MLX5_SET(delete_fte_in, din, table_type,
+ MLX5_GET(set_fte_in, in, table_type));
+ MLX5_SET(delete_fte_in, din, table_id,
+ MLX5_GET(set_fte_in, in, table_id));
+ MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+ break;
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+ break;
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ break;
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ break;
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
+ *obj_id = MLX5_GET(create_scheduling_element_out, out,
+ scheduling_element_id);
+ MLX5_SET(destroy_scheduling_element_in, din,
+ scheduling_hierarchy,
+ MLX5_GET(create_scheduling_element_in, in,
+ scheduling_hierarchy));
+ MLX5_SET(destroy_scheduling_element_in, din,
+ scheduling_element_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+ break;
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
+ *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+ MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+ break;
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
+ *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+ MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+ break;
+ case MLX5_CMD_OP_CREATE_QP:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ break;
+ case MLX5_CMD_OP_CREATE_SRQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ break;
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ break;
+ case MLX5_CMD_OP_CREATE_DCT:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ break;
+ case MLX5_CMD_OP_CREATE_XRQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ break;
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
+ MLX5_SET(detach_from_mcg_in, din, qpn,
+ MLX5_GET(attach_to_mcg_in, in, qpn));
+ memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
+ MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
+ MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ break;
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ break;
+ case MLX5_CMD_OP_CREATE_PSV:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_PSV);
+ MLX5_SET(destroy_psv_in, din, psvn,
+ MLX5_GET(create_psv_out, out, psv0_index));
+ break;
+ default:
+ /* The entry must match to one of the devx_is_obj_create_cmd */
+ WARN_ON(true);
+ break;
+ }
+}
+
+static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
+ struct devx_obj *obj,
+ void *in, int in_len)
+{
+ int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
+ MLX5_FLD_SZ_BYTES(create_mkey_in,
+ memory_key_mkey_entry);
+ void *mkc;
+ u8 access_mode;
+
+ if (in_len < min_len)
+ return -EINVAL;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ access_mode = MLX5_GET(mkc, mkc, access_mode);
+ access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
+
+ if (access_mode == MLX5_ACCESS_MODE_KLM ||
+ access_mode == MLX5_ACCESS_MODE_KSM) {
+ return 0;
+ }
+
+ MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ return 0;
+}
+
+static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
+ struct devx_event_subscription *sub)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ if (sub->is_cleaned)
+ return;
+
+ sub->is_cleaned = 1;
+ list_del_rcu(&sub->xa_list);
+
+ if (list_empty(&sub->obj_list))
+ return;
+
+ list_del_rcu(&sub->obj_list);
+ /* check whether key level 1 for this obj_sub_list is empty */
+ event = xa_load(&dev->devx_event_table.event_xa,
+ sub->xa_key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ sub->xa_key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
+static int devx_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_devx_event_table *devx_event_table;
+ struct devx_obj *obj = uobject->object;
+ struct devx_event_subscription *sub_entry, *tmp;
+ struct mlx5_ib_dev *dev;
+ int ret;
+
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ if (obj->flags & DEVX_OBJ_FLAGS_DCT)
+ ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
+ else
+ ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
+ obj->dinlen, out, sizeof(out));
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ devx_event_table = &dev->devx_event_table;
+
+ mutex_lock(&devx_event_table->event_xa_lock);
+ list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
+ devx_cleanup_subscription(dev, sub_entry);
+ mutex_unlock(&devx_event_table->event_xa_lock);
+
+ kfree(obj);
+ return ret;
+}
+
+static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
+{
+ struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
+ struct mlx5_devx_event_table *table;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u32 obj_id = mcq->cqn;
+
+ table = &obj->ib_dev->devx_event_table;
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
+ if (!event)
+ goto out;
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event)
+ goto out;
+
+ dispatch_event_fd(&obj_event->obj_sub_list, eqe);
+out:
+ rcu_read_unlock();
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
+ int cmd_in_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
+ void *cmd_out;
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct devx_obj *obj;
+ u16 obj_type = 0;
+ int err;
+ int uid;
+ u32 obj_id;
+ u16 opcode;
+
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ if (!devx_is_obj_create_cmd(cmd_in, &opcode))
+ return -EINVAL;
+
+ cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+ if (IS_ERR(cmd_out))
+ return PTR_ERR(cmd_out);
+
+ obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
+ err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
+ if (err)
+ goto obj_free;
+ } else {
+ devx_set_umem_valid(cmd_in);
+ }
+
+ if (opcode == MLX5_CMD_OP_CREATE_DCT) {
+ obj->flags |= DEVX_OBJ_FLAGS_DCT;
+ err = mlx5_core_create_dct(dev->mdev, &obj->core_dct,
+ cmd_in, cmd_in_len,
+ cmd_out, cmd_out_len);
+ } else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
+ obj->flags |= DEVX_OBJ_FLAGS_CQ;
+ obj->core_cq.comp = devx_cq_comp;
+ err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
+ cmd_in, cmd_in_len, cmd_out,
+ cmd_out_len);
+ } else {
+ err = mlx5_cmd_exec(dev->mdev, cmd_in,
+ cmd_in_len,
+ cmd_out, cmd_out_len);
+ }
+
+ if (err)
+ goto obj_free;
+
+ if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
+ u8 bulk = MLX5_GET(alloc_flow_counter_in,
+ cmd_in,
+ flow_counter_bulk);
+ obj->flow_counter_bulk_size = 128UL * bulk;
+ }
+
+ uobj->object = obj;
+ INIT_LIST_HEAD(&obj->event_sub);
+ obj->ib_dev = dev;
+ devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
+ &obj_id);
+ WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
+ if (err)
+ goto obj_destroy;
+
+ if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJ)
+ obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
+ obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
+
+ return 0;
+
+obj_destroy:
+ if (obj->flags & DEVX_OBJ_FLAGS_DCT)
+ mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
+ else
+ mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
+ sizeof(out));
+obj_free:
+ kfree(obj);
+ return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
+ void *cmd_out;
+ int err;
+ int uid;
+
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ if (!devx_is_obj_modify_cmd(cmd_in))
+ return -EINVAL;
+
+ if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
+ return -EINVAL;
+
+ cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+ if (IS_ERR(cmd_out))
+ return PTR_ERR(cmd_out);
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ devx_set_umem_valid(cmd_in);
+
+ err = mlx5_cmd_exec(mdev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err)
+ return err;
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+ cmd_out, cmd_out_len);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ void *cmd_out;
+ int err;
+ int uid;
+ struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
+
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ if (!devx_is_obj_query_cmd(cmd_in))
+ return -EINVAL;
+
+ if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
+ return -EINVAL;
+
+ cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+ if (IS_ERR(cmd_out))
+ return PTR_ERR(cmd_out);
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ err = mlx5_cmd_exec(mdev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err)
+ return err;
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+ cmd_out, cmd_out_len);
+}
+
+struct devx_async_event_queue {
+ spinlock_t lock;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+ atomic_t bytes_in_use;
+ u8 is_destroyed:1;
+};
+
+struct devx_async_cmd_event_file {
+ struct ib_uobject uobj;
+ struct devx_async_event_queue ev_queue;
+ struct mlx5_async_ctx async_ctx;
+};
+
+static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
+{
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ atomic_set(&ev_queue->bytes_in_use, 0);
+ ev_queue->is_destroyed = 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct devx_async_cmd_event_file *ev_file;
+
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+
+ ev_file = container_of(uobj, struct devx_async_cmd_event_file,
+ uobj);
+ devx_init_event_queue(&ev_file->ev_queue);
+ mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
+ struct devx_async_event_file *ev_file;
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ u32 flags;
+ int err;
+
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
+
+ if (err)
+ return err;
+
+ ev_file = container_of(uobj, struct devx_async_event_file,
+ uobj);
+ spin_lock_init(&ev_file->lock);
+ INIT_LIST_HEAD(&ev_file->event_list);
+ init_waitqueue_head(&ev_file->poll_wait);
+ if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
+ ev_file->omit_data = 1;
+ INIT_LIST_HEAD(&ev_file->subscribed_events_list);
+ ev_file->dev = dev;
+ get_device(&dev->ib_dev.dev);
+ return 0;
+}
+
+static void devx_query_callback(int status, struct mlx5_async_work *context)
+{
+ struct devx_async_data *async_data =
+ container_of(context, struct devx_async_data, cb_work);
+ struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
+ struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
+ unsigned long flags;
+
+ /*
+ * Note that if the struct devx_async_cmd_event_file uobj begins to be
+ * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
+ * routine returns, ensuring that it always remains valid here.
+ */
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ list_add_tail(&async_data->list, &ev_queue->event_list);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
+
+ wake_up_interruptible(&ev_queue->poll_wait);
+}
+
+#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
+ u16 cmd_out_len;
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct ib_uobject *fd_uobj;
+ int err;
+ int uid;
+ struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
+ struct devx_async_cmd_event_file *ev_file;
+ struct devx_async_data *async_data;
+
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ if (!devx_is_obj_query_cmd(cmd_in))
+ return -EINVAL;
+
+ err = uverbs_get_const(&cmd_out_len, attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
+ if (err)
+ return err;
+
+ if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
+ return -EINVAL;
+
+ fd_uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
+ if (IS_ERR(fd_uobj))
+ return PTR_ERR(fd_uobj);
+
+ ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
+ uobj);
+
+ if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
+ MAX_ASYNC_BYTES_IN_USE) {
+ atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
+ return -EAGAIN;
+ }
+
+ async_data = kvzalloc(struct_size(async_data, hdr.out_data,
+ cmd_out_len), GFP_KERNEL);
+ if (!async_data) {
+ err = -ENOMEM;
+ goto sub_bytes;
+ }
+
+ err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
+ if (err)
+ goto free_async;
+
+ async_data->cmd_out_len = cmd_out_len;
+ async_data->mdev = mdev;
+ async_data->ev_file = ev_file;
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
+ uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
+ async_data->hdr.out_data,
+ async_data->cmd_out_len,
+ devx_query_callback, &async_data->cb_work);
+
+ if (err)
+ goto free_async;
+
+ return 0;
+
+free_async:
+ kvfree(async_data);
+sub_bytes:
+ atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
+ return err;
+}
+
+static void
+subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ /* Level 1 is valid for future use, no need to free */
+ if (!is_level2)
+ return;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids,
+ key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
+static int
+subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_obj_event *obj_event;
+ struct devx_event *event;
+ int err;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ if (!event) {
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&event->unaffiliated_list);
+ xa_init_flags(&event->object_ids, 0);
+
+ err = xa_insert(&devx_event_table->event_xa,
+ key_level1,
+ event,
+ GFP_KERNEL);
+ if (err) {
+ kfree(event);
+ return err;
+ }
+ }
+
+ if (!is_level2)
+ return 0;
+
+ obj_event = xa_load(&event->object_ids, key_level2);
+ if (!obj_event) {
+ obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
+ if (!obj_event)
+ /* Level1 is valid for future use, no need to free */
+ return -ENOMEM;
+
+ err = xa_insert(&event->object_ids,
+ key_level2,
+ obj_event,
+ GFP_KERNEL);
+ if (err)
+ return err;
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
+ }
+
+ return 0;
+}
+
+static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ int i;
+
+ for (i = 0; i < num_events; i++) {
+ if (obj) {
+ if (!is_legacy_obj_event_num(event_type_num_list[i]))
+ return false;
+ } else if (!is_legacy_unaffiliated_event_num(
+ event_type_num_list[i])) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#define MAX_SUPP_EVENT_NUM 255
+static bool is_valid_events(struct mlx5_core_dev *dev,
+ int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ __be64 *aff_events;
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+ int i;
+
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ aff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev,
+ user_affiliated_events);
+ unaff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ } else {
+ return is_valid_events_legacy(num_events, event_type_num_list,
+ obj);
+ }
+
+ for (i = 0; i < num_events; i++) {
+ if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
+ return false;
+
+ mask_entry = event_type_num_list[i] / 64;
+ mask_bit = event_type_num_list[i] % 64;
+
+ if (obj) {
+ /* CQ completion */
+ if (event_type_num_list[i] == 0)
+ continue;
+
+ if (!(be64_to_cpu(aff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+
+ continue;
+ }
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+ }
+
+ return true;
+}
+
+#define MAX_NUM_EVENTS 16
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
+ attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ struct ib_uobject *fd_uobj;
+ struct devx_obj *obj = NULL;
+ struct devx_async_event_file *ev_file;
+ struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
+ u16 *event_type_num_list;
+ struct devx_event_subscription *event_sub, *tmp_sub;
+ struct list_head sub_list;
+ int redirect_fd;
+ bool use_eventfd = false;
+ int num_events;
+ int num_alloc_xa_entries = 0;
+ u16 obj_type = 0;
+ u64 cookie = 0;
+ u32 obj_id = 0;
+ int err;
+ int i;
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!IS_ERR(devx_uobj)) {
+ obj = (struct devx_obj *)devx_uobj->object;
+ if (obj)
+ obj_id = get_dec_obj_id(obj->obj_id);
+ }
+
+ fd_uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
+ if (IS_ERR(fd_uobj))
+ return PTR_ERR(fd_uobj);
+
+ ev_file = container_of(fd_uobj, struct devx_async_event_file,
+ uobj);
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
+ err = uverbs_copy_from(&redirect_fd, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
+ if (err)
+ return err;
+
+ use_eventfd = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
+ if (use_eventfd)
+ return -EINVAL;
+
+ err = uverbs_copy_from(&cookie, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
+ if (err)
+ return err;
+ }
+
+ num_events = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ sizeof(u16));
+
+ if (num_events < 0)
+ return num_events;
+
+ if (num_events > MAX_NUM_EVENTS)
+ return -EINVAL;
+
+ event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
+
+ if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&sub_list);
+
+ /* Protect from concurrent subscriptions to same XA entries to allow
+ * both to succeed
+ */
+ mutex_lock(&devx_event_table->event_xa_lock);
+ for (i = 0; i < num_events; i++) {
+ u32 key_level1;
+
+ if (obj)
+ obj_type = get_dec_obj_type(obj,
+ event_type_num_list[i]);
+ key_level1 = event_type_num_list[i] | obj_type << 16;
+
+ err = subscribe_event_xa_alloc(devx_event_table,
+ key_level1,
+ obj,
+ obj_id);
+ if (err)
+ goto err;
+
+ num_alloc_xa_entries++;
+ event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
+ if (!event_sub)
+ goto err;
+
+ list_add_tail(&event_sub->event_list, &sub_list);
+ uverbs_uobject_get(&ev_file->uobj);
+ if (use_eventfd) {
+ event_sub->eventfd =
+ fdget(redirect_fd);
+
+ if (event_sub->eventfd.file == NULL) {
+ err = -EBADF;
+ goto err;
+ }
+ }
+
+ event_sub->cookie = cookie;
+ event_sub->ev_file = ev_file;
+ /* May be needed upon cleanup the devx object/subscription */
+ event_sub->xa_key_level1 = key_level1;
+ event_sub->xa_key_level2 = obj_id;
+ INIT_LIST_HEAD(&event_sub->obj_list);
+ }
+
+ /* Once all the allocations and the XA data insertions were done we
+ * can go ahead and add all the subscriptions to the relevant lists
+ * without concern of a failure.
+ */
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+
+ list_del_init(&event_sub->event_list);
+
+ spin_lock_irq(&ev_file->lock);
+ list_add_tail_rcu(&event_sub->file_list,
+ &ev_file->subscribed_events_list);
+ spin_unlock_irq(&ev_file->lock);
+
+ event = xa_load(&devx_event_table->event_xa,
+ event_sub->xa_key_level1);
+ WARN_ON(!event);
+
+ if (!obj) {
+ list_add_tail_rcu(&event_sub->xa_list,
+ &event->unaffiliated_list);
+ continue;
+ }
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ WARN_ON(!obj_event);
+ list_add_tail_rcu(&event_sub->xa_list,
+ &obj_event->obj_sub_list);
+ list_add_tail_rcu(&event_sub->obj_list,
+ &obj->event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return 0;
+
+err:
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ list_del(&event_sub->event_list);
+
+ subscribe_event_xa_dealloc(devx_event_table,
+ event_sub->xa_key_level1,
+ obj,
+ obj_id);
+
+ if (event_sub->eventfd.file)
+ fdput(event_sub->eventfd);
+ uverbs_uobject_put(&event_sub->ev_file->uobj);
+ kfree(event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return err;
+}
+
+static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
+ struct uverbs_attr_bundle *attrs,
+ struct devx_umem *obj)
+{
+ u64 addr;
+ size_t size;
+ u32 access;
+ int npages;
+ int err;
+ u32 page_mask;
+
+ if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
+ uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
+ return -EFAULT;
+
+ err = uverbs_get_flags32(&access, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+ if (err)
+ return err;
+
+ err = ib_check_mr_access(access);
+ if (err)
+ return err;
+
+ obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
+ if (IS_ERR(obj->umem))
+ return PTR_ERR(obj->umem);
+
+ mlx5_ib_cont_pages(obj->umem, obj->umem->address,
+ MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
+ &obj->page_shift, &obj->ncont, NULL);
+
+ if (!npages) {
+ ib_umem_release(obj->umem);
+ return -EINVAL;
+ }
+
+ page_mask = (1 << obj->page_shift) - 1;
+ obj->page_offset = obj->umem->address & page_mask;
+
+ return 0;
+}
+
+static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
+ struct devx_umem *obj,
+ struct devx_umem_reg_cmd *cmd)
+{
+ cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
+ (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
+ cmd->in = uverbs_zalloc(attrs, cmd->inlen);
+ return PTR_ERR_OR_ZERO(cmd->in);
+}
+
+static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
+ struct devx_umem *obj,
+ struct devx_umem_reg_cmd *cmd)
+{
+ void *umem;
+ __be64 *mtt;
+
+ umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
+ mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
+
+ MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
+ MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
+ MLX5_SET(umem, umem, log_page_size, obj->page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(umem, umem, page_offset, obj->page_offset);
+ mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
+ (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
+ MLX5_IB_MTT_READ);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct devx_umem_reg_cmd cmd;
+ struct devx_umem *obj;
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
+ u32 obj_id;
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ int err;
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
+ if (err)
+ goto err_obj_free;
+
+ err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
+ if (err)
+ goto err_umem_release;
+
+ devx_umem_reg_cmd_build(dev, obj, &cmd);
+
+ MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
+ err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
+ sizeof(cmd.out));
+ if (err)
+ goto err_umem_release;
+
+ obj->mdev = dev->mdev;
+ uobj->object = obj;
+ devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
+ if (err)
+ goto err_umem_destroy;
+
+ return 0;
+
+err_umem_destroy:
+ mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
+err_umem_release:
+ ib_umem_release(obj->umem);
+err_obj_free:
+ kfree(obj);
+ return err;
+}
+
+static int devx_umem_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct devx_umem *obj = uobject->object;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ int err;
+
+ err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+ if (ib_is_destroy_retryable(err, why, uobject))
+ return err;
+
+ ib_umem_release(obj->umem);
+ kfree(obj);
+ return 0;
+}
+
+static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
+ unsigned long event_type)
+{
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+
+ if (!MLX5_CAP_GEN(dev, event_cap))
+ return is_legacy_unaffiliated_event_num(event_type);
+
+ unaff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
+
+ mask_entry = event_type / 64;
+ mask_bit = event_type % 64;
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
+ return false;
+
+ return true;
+}
+
+static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
+{
+ struct mlx5_eqe *eqe = data;
+ u32 obj_id = 0;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ break;
+ default:
+ obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
+ break;
+ }
+
+ return obj_id;
+}
+
+static int deliver_event(struct devx_event_subscription *event_sub,
+ const void *data)
+{
+ struct devx_async_event_file *ev_file;
+ struct devx_async_event_data *event_data;
+ unsigned long flags;
+
+ ev_file = event_sub->ev_file;
+
+ if (ev_file->omit_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ if (!list_empty(&event_sub->event_list) ||
+ ev_file->is_destroyed) {
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return 0;
+ }
+
+ list_add_tail(&event_sub->event_list, &ev_file->event_list);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+ return 0;
+ }
+
+ event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
+ GFP_ATOMIC);
+ if (!event_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ ev_file->is_overflow_err = 1;
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return -ENOMEM;
+ }
+
+ event_data->hdr.cookie = event_sub->cookie;
+ memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
+
+ spin_lock_irqsave(&ev_file->lock, flags);
+ if (!ev_file->is_destroyed)
+ list_add_tail(&event_data->list, &ev_file->event_list);
+ else
+ kfree(event_data);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+
+ return 0;
+}
+
+static void dispatch_event_fd(struct list_head *fd_list,
+ const void *data)
+{
+ struct devx_event_subscription *item;
+
+ list_for_each_entry_rcu(item, fd_list, xa_list) {
+ if (item->eventfd.file != NULL)
+ linux_poll_wakeup(item->eventfd.file);
+ else
+ deliver_event(item, data);
+ }
+}
+
+static bool mlx5_devx_event_notifier(struct mlx5_core_dev *mdev,
+ uint8_t event_type, void *data)
+{
+ struct mlx5_ib_dev *dev;
+ struct mlx5_devx_event_table *table;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u16 obj_type = 0;
+ bool is_unaffiliated;
+ u32 obj_id;
+
+ /* Explicit filtering to kernel events which may occur frequently */
+ if (event_type == MLX5_EVENT_TYPE_CMD ||
+ event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
+ return true;
+
+ dev = mdev->priv.eq_table.dev;
+ table = &dev->devx_event_table;
+ is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
+
+ if (!is_unaffiliated)
+ obj_type = get_event_obj_type(event_type, data);
+
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, event_type | (obj_type << 16));
+ if (!event) {
+ rcu_read_unlock();
+ return false;
+ }
+
+ if (is_unaffiliated) {
+ dispatch_event_fd(&event->unaffiliated_list, data);
+ rcu_read_unlock();
+ return true;
+ }
+
+ obj_id = devx_get_obj_id_from_event(event_type, data);
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event) {
+ rcu_read_unlock();
+ return false;
+ }
+
+ dispatch_event_fd(&obj_event->obj_sub_list, data);
+
+ rcu_read_unlock();
+ return true;
+}
+
+void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+
+ xa_init_flags(&table->event_xa, 0);
+ mutex_init(&table->event_xa_lock);
+ dev->mdev->priv.eq_table.dev = dev;
+ dev->mdev->priv.eq_table.cb = mlx5_devx_event_notifier;
+}
+
+void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ struct devx_event_subscription *sub, *tmp;
+ struct devx_event *event;
+ void *entry;
+ unsigned long id;
+
+ dev->mdev->priv.eq_table.cb = NULL;
+ dev->mdev->priv.eq_table.dev = NULL;
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ xa_for_each(&table->event_xa, id, entry) {
+ event = entry;
+ list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
+ xa_list)
+ devx_cleanup_subscription(dev, sub);
+ kfree(entry);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+ xa_destroy(&table->event_xa);
+}
+
+static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
+ struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ struct devx_async_data *event;
+ int ret = 0;
+ size_t eventsz;
+
+ spin_lock_irq(&ev_queue->lock);
+
+ while (list_empty(&ev_queue->event_list)) {
+ spin_unlock_irq(&ev_queue->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(
+ ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
+ ev_queue->is_destroyed))) {
+ return -ERESTARTSYS;
+ }
+
+ spin_lock_irq(&ev_queue->lock);
+ if (ev_queue->is_destroyed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
+ }
+
+ event = list_entry(ev_queue->event_list.next,
+ struct devx_async_data, list);
+ eventsz = event->cmd_out_len +
+ sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
+
+ if (eventsz > count) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -ENOSPC;
+ }
+
+ list_del(ev_queue->event_list.next);
+ spin_unlock_irq(&ev_queue->lock);
+
+ if (copy_to_user(buf, &event->hdr, eventsz))
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+
+ atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
+ kvfree(event);
+ return ret;
+}
+
+static __poll_t devx_async_cmd_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
+ struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ __poll_t pollflags = 0;
+
+ poll_wait(filp, &ev_queue->poll_wait, wait);
+
+ spin_lock_irq(&ev_queue->lock);
+ if (ev_queue->is_destroyed)
+ pollflags = POLLIN | POLLRDNORM | POLLHUP;
+ else if (!list_empty(&ev_queue->event_list))
+ pollflags = POLLIN | POLLRDNORM;
+ spin_unlock_irq(&ev_queue->lock);
+
+ return pollflags;
+}
+
+static const struct file_operations devx_async_cmd_event_fops = {
+ .owner = THIS_MODULE,
+ .read = devx_async_cmd_event_read,
+ .poll = devx_async_cmd_event_poll,
+ .release = uverbs_uobject_fd_release,
+ .llseek = no_llseek,
+};
+
+static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ struct devx_event_subscription *event_sub;
+ struct devx_async_event_data *uninitialized_var(event);
+ int ret = 0;
+ size_t eventsz;
+ bool omit_data;
+ void *event_data;
+
+ omit_data = ev_file->omit_data;
+
+ spin_lock_irq(&ev_file->lock);
+
+ if (ev_file->is_overflow_err) {
+ ev_file->is_overflow_err = 0;
+ spin_unlock_irq(&ev_file->lock);
+ return -EOVERFLOW;
+ }
+
+
+ while (list_empty(&ev_file->event_list)) {
+ spin_unlock_irq(&ev_file->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(ev_file->poll_wait,
+ (!list_empty(&ev_file->event_list) ||
+ ev_file->is_destroyed))) {
+ return -ERESTARTSYS;
+ }
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EIO;
+ }
+ }
+
+ if (omit_data) {
+ event_sub = list_first_entry(&ev_file->event_list,
+ struct devx_event_subscription,
+ event_list);
+ eventsz = sizeof(event_sub->cookie);
+ event_data = &event_sub->cookie;
+ } else {
+ event = list_first_entry(&ev_file->event_list,
+ struct devx_async_event_data, list);
+ eventsz = sizeof(struct mlx5_eqe) +
+ sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
+ event_data = &event->hdr;
+ }
+
+ if (eventsz > count) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EINVAL;
+ }
+
+ if (omit_data)
+ list_del_init(&event_sub->event_list);
+ else
+ list_del(&event->list);
+
+ spin_unlock_irq(&ev_file->lock);
+
+ if (copy_to_user(buf, event_data, eventsz))
+ /* This points to an application issue, not a kernel concern */
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+
+ if (!omit_data)
+ kfree(event);
+ return ret;
+}
+
+static __poll_t devx_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ __poll_t pollflags = 0;
+
+ poll_wait(filp, &ev_file->poll_wait, wait);
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed)
+ pollflags = POLLIN | POLLRDNORM | POLLHUP;
+ else if (!list_empty(&ev_file->event_list))
+ pollflags = POLLIN | POLLRDNORM;
+ spin_unlock_irq(&ev_file->lock);
+
+ return pollflags;
+}
+
+static void devx_free_subscription(struct rcu_head *rcu)
+{
+ struct devx_event_subscription *event_sub =
+ container_of(rcu, struct devx_event_subscription, rcu);
+
+ if (event_sub->eventfd.file)
+ fdput(event_sub->eventfd);
+ uverbs_uobject_put(&event_sub->ev_file->uobj);
+ kfree(event_sub);
+}
+
+static const struct file_operations devx_async_event_fops = {
+ .owner = THIS_MODULE,
+ .read = devx_async_event_read,
+ .poll = devx_async_event_poll,
+ .release = uverbs_uobject_fd_release,
+ .llseek = no_llseek,
+};
+
+static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct devx_async_cmd_event_file *comp_ev_file =
+ container_of(uobj, struct devx_async_cmd_event_file,
+ uobj);
+ struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ struct devx_async_data *entry, *tmp;
+
+ spin_lock_irq(&ev_queue->lock);
+ ev_queue->is_destroyed = 1;
+ spin_unlock_irq(&ev_queue->lock);
+ wake_up_interruptible(&ev_queue->poll_wait);
+
+ mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
+
+ spin_lock_irq(&comp_ev_file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp,
+ &comp_ev_file->ev_queue.event_list, list) {
+ list_del(&entry->list);
+ kvfree(entry);
+ }
+ spin_unlock_irq(&comp_ev_file->ev_queue.lock);
+ return 0;
+};
+
+static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct devx_async_event_file *ev_file =
+ container_of(uobj, struct devx_async_event_file,
+ uobj);
+ struct devx_event_subscription *event_sub, *event_sub_tmp;
+ struct mlx5_ib_dev *dev = ev_file->dev;
+
+ spin_lock_irq(&ev_file->lock);
+ ev_file->is_destroyed = 1;
+
+ /* free the pending events allocation */
+ if (ev_file->omit_data) {
+ struct devx_event_subscription *event_sub, *tmp;
+
+ list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
+ event_list)
+ list_del_init(&event_sub->event_list);
+
+ } else {
+ struct devx_async_event_data *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
+ list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+
+ spin_unlock_irq(&ev_file->lock);
+ wake_up_interruptible(&ev_file->poll_wait);
+
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ /* delete the subscriptions which are related to this FD */
+ list_for_each_entry_safe(event_sub, event_sub_tmp,
+ &ev_file->subscribed_events_list, file_list) {
+ devx_cleanup_subscription(dev, event_sub);
+ list_del_rcu(&event_sub->file_list);
+ /* subscription may not be used by the read API any more */
+ call_rcu(&event_sub->rcu, devx_free_subscription);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+
+ put_device(&dev->ib_dev.dev);
+ return 0;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_UMEM_REG,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ enum ib_access_flags),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_QUERY_EQN,
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_QUERY_UAR,
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OTHER,
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
+ UVERBS_IDR_ANY_OBJECT,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+ UVERBS_IDR_ANY_OBJECT,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+ UVERBS_IDR_ANY_OBJECT,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
+ u16, UA_MANDATORY),
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
+ UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
+
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
+ devx_async_cmd_event_destroy_uobj,
+ &devx_async_cmd_event_fops, "[devx_async_cmd]",
+ FMODE_READ),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ enum mlx5_ib_uapi_devx_create_event_channel_flags,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
+ devx_async_event_destroy_uobj,
+ &devx_async_event_fops, "[devx_async_event]",
+ FMODE_READ),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
+
+static bool devx_is_supported(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return MLX5_CAP_GEN(dev->mdev, log_max_uctx);
+}
+
+const struct uapi_definition mlx5_ib_devx_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ {},
+};
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2016-2020, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
@@ -45,6 +45,7 @@
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
#include <dev/mlx5/fs.h>
@@ -1106,6 +1107,12 @@
return err;
}
+static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
+{
+ mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
+ caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
+}
+
static u16 calc_dynamic_bfregs(int uars_per_sys_page)
{
/* Large page with non 4k uar support might limit the dynamic size */
@@ -1194,55 +1201,70 @@
mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
}
-static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
+ u16 uid)
+{
+ int err;
+
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ return 0;
+
+ err = mlx5_alloc_transport_domain(dev->mdev, tdn, uid);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
+ u16 uid)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ return;
+
+ mlx5_dealloc_transport_domain(dev->mdev, tdn, uid);
+}
+
+static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = uctx->device;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
- struct mlx5_ib_ucontext *context;
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
struct mlx5_bfreg_info *bfregi;
int ver;
int err;
- size_t reqlen;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
bool lib_uar_4k;
bool lib_uar_dyn;
if (!dev->ib_active)
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
- if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
- return ERR_PTR(-EINVAL);
-
- reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
- if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
+ if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
- else if (reqlen >= min_req_v2)
+ else if (udata->inlen >= min_req_v2)
ver = 2;
else
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
- err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
+ err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
if (err)
- return ERR_PTR(err);
+ return err;
- if (req.flags)
- return ERR_PTR(-EINVAL);
+ if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
+ return -EOPNOTSUPP;
if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
req.total_num_bfregs = ALIGN(req.total_num_bfregs,
MLX5_NON_FP_BFREGS_PER_UAR);
if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
- return ERR_PTR(-EINVAL);
-
- if (reqlen > sizeof(req) &&
- !ib_is_udata_cleared(udata, sizeof(req),
- reqlen - sizeof(req)))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EINVAL;
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
@@ -1263,10 +1285,6 @@
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
-
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
bfregi = &context->bfregi;
@@ -1303,19 +1321,18 @@
goto out_sys_pages;
uar_done:
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
-#endif
-
- if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
- err = mlx5_alloc_transport_domain(dev->mdev,
- &context->tdn);
- if (err)
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
+ err = mlx5_ib_devx_create(dev, true);
+ if (err < 0)
goto out_uars;
+ context->devx_uid = err;
}
- INIT_LIST_HEAD(&context->vma_private_list);
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
+ context->devx_uid);
+ if (err)
+ goto out_devx;
+
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@@ -1360,17 +1377,21 @@
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- goto out_td;
+ goto out_mdev;
bfregi->ver = ver;
bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
context->cqe_version = resp.cqe_version;
+ context->lib_caps = req.lib_caps;
+ print_lib_caps(dev, context->lib_caps);
- return &context->ibucontext;
+ return 0;
-out_td:
- if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
+out_mdev:
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+out_devx:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
out_uars:
deallocate_uars(dev, context);
@@ -1382,26 +1403,24 @@
kfree(bfregi->count);
out_ctx:
- kfree(context);
- return ERR_PTR(err);
+ return err;
}
-static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_bfreg_info *bfregi;
bfregi = &context->bfregi;
- if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+
+ if (context->devx_uid)
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
kfree(bfregi->count);
- kfree(context);
-
- return 0;
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -1435,131 +1454,9 @@
return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
}
-static void mlx5_ib_vma_open(struct vm_area_struct *area)
-{
- /* vma_open is called when a new VMA is created on top of our VMA. This
- * is done through either mremap flow or split_vma (usually due to
- * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
- * as this VMA is strongly hardware related. Therefore we set the
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
- * calling us again and trying to do incorrect actions. We assume that
- * the original VMA size is exactly a single page, and therefore all
- * "splitting" operation will not happen to it.
- */
- area->vm_ops = NULL;
-}
-
-static void mlx5_ib_vma_close(struct vm_area_struct *area)
-{
- struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
-
- /* It's guaranteed that all VMAs opened on a FD are closed before the
- * file itself is closed, therefore no sync is needed with the regular
- * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
- * However need a sync with accessing the vma as part of
- * mlx5_ib_disassociate_ucontext.
- * The close operation is usually called under mm->mmap_sem except when
- * process is exiting.
- * The exiting case is handled explicitly as part of
- * mlx5_ib_disassociate_ucontext.
- */
- mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
-
- /* setting the vma context pointer to null in the mlx5_ib driver's
- * private data, to protect a race condition in
- * mlx5_ib_disassociate_ucontext().
- */
- mlx5_ib_vma_priv_data->vma = NULL;
- list_del(&mlx5_ib_vma_priv_data->list);
- kfree(mlx5_ib_vma_priv_data);
-}
-
-static const struct vm_operations_struct mlx5_ib_vm_ops = {
- .open = mlx5_ib_vma_open,
- .close = mlx5_ib_vma_close
-};
-
-static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
- struct mlx5_ib_ucontext *ctx)
-{
- struct mlx5_ib_vma_private_data *vma_prv;
- struct list_head *vma_head = &ctx->vma_private_list;
-
- vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
- if (!vma_prv)
- return -ENOMEM;
-
- vma_prv->vma = vma;
- vma->vm_private_data = vma_prv;
- vma->vm_ops = &mlx5_ib_vm_ops;
-
- list_add(&vma_prv->list, vma_head);
-
- return 0;
-}
static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- int ret;
- struct vm_area_struct *vma;
- struct mlx5_ib_vma_private_data *vma_private, *n;
- struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
-
- owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
- if (!owning_process)
- return;
-
- owning_mm = get_task_mm(owning_process);
- if (!owning_mm) {
- pr_info("no mm, disassociate ucontext is pending task termination\n");
- while (1) {
- put_task_struct(owning_process);
- usleep_range(1000, 2000);
- owning_process = get_pid_task(ibcontext->tgid,
- PIDTYPE_PID);
- if (!owning_process || owning_process->task_thread->
- td_proc->p_state == PRS_ZOMBIE) {
- pr_info("disassociate ucontext done, task was terminated\n");
- /* in case task was dead need to release the
- * task struct.
- */
- if (owning_process)
- put_task_struct(owning_process);
- return;
- }
- }
- }
-
- /* need to protect from a race on closing the vma as part of
- * mlx5_ib_vma_close.
- */
- down_write(&owning_mm->mmap_sem);
- list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
- list) {
- vma = vma_private->vma;
- ret = zap_vma_ptes(vma, vma->vm_start,
- PAGE_SIZE);
- if (ret == -ENOTSUP) {
- if (bootverbose)
- WARN_ONCE(
- "%s: zap_vma_ptes not implemented for unmanaged mappings", __func__);
- } else {
- WARN(ret, "%s: zap_vma_ptes failed, error %d",
- __func__, -ret);
- }
- /* context going to be destroyed, should
- * not access ops any more.
- */
- /* XXXKIB vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); */
- vma->vm_ops = NULL;
- list_del(&vma_private->list);
- kfree(vma_private);
- }
- up_write(&owning_mm->mmap_sem);
- mmput(owning_mm);
- put_task_struct(owning_process);
}
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@ -1576,6 +1473,39 @@
}
}
+static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
+ struct vm_area_struct *vma,
+ struct mlx5_ib_ucontext *context)
+{
+ if ((vma->vm_end - vma->vm_start != PAGE_SIZE) ||
+ !(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1)
+ return -EOPNOTSUPP;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+
+ return -EOPNOTSUPP;
+}
+
+static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
+{
+ struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
+ struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
+
+ switch (mentry->mmap_flag) {
+ case MLX5_IB_MMAP_TYPE_UAR_WC:
+ case MLX5_IB_MMAP_TYPE_UAR_NC:
+ mlx5_cmd_free_uar(dev->mdev, mentry->page_idx);
+ kfree(mentry);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
@@ -1587,7 +1517,7 @@
pgprot_t prot;
u32 bfreg_dyn_idx = 0;
u32 uar_index;
- int dyn_uar = (cmd == MLX5_IB_MMAP_WC_PAGE);
+ int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC);
int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
bfregi->num_static_sys_pages;
@@ -1610,6 +1540,7 @@
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
+ case MLX5_IB_MMAP_ALLOC_WC:
case MLX5_IB_MMAP_REGULAR_PAGE:
/* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
prot = pgprot_writecombine(vma->vm_page_prot);
@@ -1657,18 +1588,18 @@
pfn = uar_index2pfn(dev, uar_index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
- vma->vm_page_prot = prot;
- err = io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot);
+ err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
+ prot, NULL);
if (err) {
- mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%llx, pfn=%pa, mmap_cmd=%s\n",
- err, (unsigned long long)vma->vm_start, &pfn, mmap_cmd2str(cmd));
+ mlx5_ib_err(dev,
+ "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
+ err, mmap_cmd2str(cmd));
goto err;
}
if (dyn_uar)
bfregi->sys_pages[idx] = uar_index;
- return mlx5_ib_set_vma_data(vma, context);
+ return 0;
err:
if (!dyn_uar)
@@ -1682,6 +1613,48 @@
return err;
}
+static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
+{
+ unsigned long idx;
+ u8 command;
+
+ command = get_command(vma->vm_pgoff);
+ idx = get_extended_index(vma->vm_pgoff);
+
+ return (command << 16 | idx);
+}
+
+static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
+ struct vm_area_struct *vma,
+ struct ib_ucontext *ucontext)
+{
+ struct mlx5_user_mmap_entry *mentry;
+ struct rdma_user_mmap_entry *entry;
+ unsigned long pgoff;
+ pgprot_t prot;
+ phys_addr_t pfn;
+ int ret;
+
+ pgoff = mlx5_vma_to_pgoff(vma);
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
+ if (!entry)
+ return -EINVAL;
+
+ mentry = to_mmmap(entry);
+ pfn = (mentry->address >> PAGE_SHIFT);
+ if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR ||
+ mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC)
+ prot = pgprot_noncached(vma->vm_page_prot);
+ else
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ ret = rdma_user_mmap_io(ucontext, vma, pfn,
+ entry->npages * PAGE_SIZE,
+ prot,
+ entry);
+ rdma_user_mmap_entry_put(&mentry->rdma_entry);
+ return ret;
+}
+
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -1692,6 +1665,10 @@
command = get_command(vma->vm_pgoff);
switch (command) {
case MLX5_IB_MMAP_WC_PAGE:
+ case MLX5_IB_MMAP_ALLOC_WC:
+ if (!dev->wc_support)
+ return -EPERM;
+ /* FALLTHROUGH */
case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
return uar_mmap(dev, command, vma, context);
@@ -1710,65 +1687,55 @@
if (PAGE_SIZE > 4096)
return -EOPNOTSUPP;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pfn = (dev->mdev->iseg_base +
offsetof(struct mlx5_init_seg, internal_timer_h)) >>
PAGE_SHIFT;
- if (io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx5_ib_dbg(dev, "mapped internal timer at 0x%llx, PA 0x%llx\n",
- (unsigned long long)vma->vm_start,
- (unsigned long long)pfn << PAGE_SHIFT);
- break;
+ return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ NULL);
+ case MLX5_IB_MMAP_CLOCK_INFO:
+ return mlx5_ib_mmap_clock_info_page(dev, vma, context);
default:
- return -EINVAL;
+ return mlx5_ib_mmap_offset(dev, vma, ibcontext);
}
return 0;
}
-static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
+ struct mlx5_ib_pd *pd = to_mpd(ibpd);
+ struct ib_device *ibdev = ibpd->device;
struct mlx5_ib_alloc_pd_resp resp;
- struct mlx5_ib_pd *pd;
int err;
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+ u16 uid = context ? context->devx_uid : 0;
- pd = kmalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
-
- err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
- if (err) {
- kfree(pd);
- return ERR_PTR(err);
- }
+ err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn, uid);
+ if (err)
+ return (err);
- if (context) {
+ pd->uid = uid;
+ if (udata) {
resp.pdn = pd->pdn;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
- mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
- kfree(pd);
- return ERR_PTR(-EFAULT);
+ mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
+ return -EFAULT;
}
}
- return &pd->ibpd;
+ return 0;
}
-static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
+static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
- mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
- kfree(mpd);
-
- return 0;
+ mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
enum {
@@ -2358,7 +2325,8 @@
static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain)
+ int domain,
+ struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
@@ -2372,6 +2340,7 @@
return ERR_PTR(-ENOSPC);
if (domain != IB_FLOW_DOMAIN_USER ||
+ udata != NULL ||
flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
(flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
return ERR_PTR(-EINVAL);
@@ -2734,9 +2703,12 @@
if (err)
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
- mlx5_ib_destroy_qp(dev->umrc.qp);
- ib_free_cq(dev->umrc.cq);
- ib_dealloc_pd(dev->umrc.pd);
+ if (dev->umrc.qp)
+ mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
+ if (dev->umrc.cq)
+ ib_free_cq(dev->umrc.cq);
+ if (dev->umrc.pd)
+ ib_dealloc_pd(dev->umrc.pd);
}
enum {
@@ -2835,13 +2807,16 @@
return 0;
error_4:
- mlx5_ib_destroy_qp(qp);
+ mlx5_ib_destroy_qp(qp, NULL);
+ dev->umrc.qp = NULL;
error_3:
ib_free_cq(cq);
+ dev->umrc.cq = NULL;
error_2:
ib_dealloc_pd(pd);
+ dev->umrc.pd = NULL;
error_0:
kfree(attr);
@@ -2853,36 +2828,42 @@
{
struct ib_srq_init_attr attr;
struct mlx5_ib_dev *dev;
+ struct ib_device *ibdev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
int port;
int ret = 0;
dev = container_of(devr, struct mlx5_ib_dev, devr);
+ ibdev = &dev->ib_dev;
mutex_init(&devr->mutex);
- devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
- if (IS_ERR(devr->p0)) {
- ret = PTR_ERR(devr->p0);
- goto error0;
- }
- devr->p0->device = &dev->ib_dev;
+ devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
+ if (!devr->p0)
+ return -ENOMEM;
+
+ devr->p0->device = ibdev;
devr->p0->uobject = NULL;
atomic_set(&devr->p0->usecnt, 0);
- devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
- if (IS_ERR(devr->c0)) {
- ret = PTR_ERR(devr->c0);
+ ret = mlx5_ib_alloc_pd(devr->p0, NULL);
+ if (ret)
+ goto error0;
+
+ devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq);
+ if (!devr->c0) {
+ ret = -ENOMEM;
goto error1;
}
- devr->c0->device = &dev->ib_dev;
- devr->c0->uobject = NULL;
- devr->c0->comp_handler = NULL;
- devr->c0->event_handler = NULL;
- devr->c0->cq_context = NULL;
+
+ devr->c0->device = &dev->ib_dev;
atomic_set(&devr->c0->usecnt, 0);
- devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+ ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL);
+ if (ret)
+ goto err_create_cq;
+
+ devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
if (IS_ERR(devr->x0)) {
ret = PTR_ERR(devr->x0);
goto error2;
@@ -2893,7 +2874,7 @@
mutex_init(&devr->x0->tgt_qp_mutex);
INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
- devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+ devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
if (IS_ERR(devr->x1)) {
ret = PTR_ERR(devr->x1);
goto error3;
@@ -2908,24 +2889,26 @@
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
- attr.ext.xrc.cq = devr->c0;
+ attr.ext.cq = devr->c0;
attr.ext.xrc.xrcd = devr->x0;
- devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
- if (IS_ERR(devr->s0)) {
- ret = PTR_ERR(devr->s0);
+ devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
+ if (!devr->s0) {
+ ret = -ENOMEM;
goto error4;
}
+
devr->s0->device = &dev->ib_dev;
devr->s0->pd = devr->p0;
- devr->s0->uobject = NULL;
- devr->s0->event_handler = NULL;
- devr->s0->srq_context = NULL;
devr->s0->srq_type = IB_SRQT_XRC;
devr->s0->ext.xrc.xrcd = devr->x0;
- devr->s0->ext.xrc.cq = devr->c0;
+ devr->s0->ext.cq = devr->c0;
+ ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
+ if (ret)
+ goto err_create;
+
atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
- atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
+ atomic_inc(&devr->s0->ext.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
@@ -2933,20 +2916,23 @@
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_BASIC;
- devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
- if (IS_ERR(devr->s1)) {
- ret = PTR_ERR(devr->s1);
+ devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq);
+ if (!devr->s1) {
+ ret = -ENOMEM;
goto error5;
}
+
devr->s1->device = &dev->ib_dev;
devr->s1->pd = devr->p0;
- devr->s1->uobject = NULL;
- devr->s1->event_handler = NULL;
- devr->s1->srq_context = NULL;
devr->s1->srq_type = IB_SRQT_BASIC;
- devr->s1->ext.xrc.cq = devr->c0;
+ devr->s1->ext.cq = devr->c0;
+
+ ret = mlx5_ib_create_srq(devr->s1, &attr, NULL);
+ if (ret)
+ goto error6;
+
atomic_inc(&devr->p0->usecnt);
- atomic_set(&devr->s0->usecnt, 0);
+ atomic_set(&devr->s1->usecnt, 0);
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
INIT_WORK(&devr->ports[port].pkey_change_work,
@@ -2956,35 +2942,44 @@
return 0;
+error6:
+ kfree(devr->s1);
error5:
- mlx5_ib_destroy_srq(devr->s0);
+ mlx5_ib_destroy_srq(devr->s0, NULL);
+err_create:
+ kfree(devr->s0);
error4:
- mlx5_ib_dealloc_xrcd(devr->x1);
+ mlx5_ib_dealloc_xrcd(devr->x1, NULL);
error3:
- mlx5_ib_dealloc_xrcd(devr->x0);
+ mlx5_ib_dealloc_xrcd(devr->x0, NULL);
error2:
- mlx5_ib_destroy_cq(devr->c0);
+ mlx5_ib_destroy_cq(devr->c0, NULL);
+err_create_cq:
+ kfree(devr->c0);
error1:
- mlx5_ib_dealloc_pd(devr->p0);
+ mlx5_ib_dealloc_pd(devr->p0, NULL);
error0:
+ kfree(devr->p0);
return ret;
}
static void destroy_dev_resources(struct mlx5_ib_resources *devr)
{
- struct mlx5_ib_dev *dev =
- container_of(devr, struct mlx5_ib_dev, devr);
int port;
- mlx5_ib_destroy_srq(devr->s1);
- mlx5_ib_destroy_srq(devr->s0);
- mlx5_ib_dealloc_xrcd(devr->x0);
- mlx5_ib_dealloc_xrcd(devr->x1);
- mlx5_ib_destroy_cq(devr->c0);
- mlx5_ib_dealloc_pd(devr->p0);
+ mlx5_ib_destroy_srq(devr->s1, NULL);
+ kfree(devr->s1);
+ mlx5_ib_destroy_srq(devr->s0, NULL);
+ kfree(devr->s0);
+ mlx5_ib_dealloc_xrcd(devr->x0, NULL);
+ mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_ib_destroy_cq(devr->c0, NULL);
+ kfree(devr->c0);
+ mlx5_ib_dealloc_pd(devr->p0, NULL);
+ kfree(devr->p0);
/* Make sure no change P_Key work items are still executing */
- for (port = 0; port < dev->num_ports; ++port)
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
cancel_work_sync(&devr->ports[port].pkey_change_work);
}
@@ -3296,6 +3291,7 @@
MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
+ INIT_IB_DEVICE_OPS(&dev->ib_dev.ops, mlx5, MLX5);
snprintf(dev->ib_dev.name, IB_DEVICE_NAME_MAX, "mlx5_%d", device_get_unit(mdev->pdev->dev.bsddev));
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
@@ -3353,6 +3349,7 @@
dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
dev->ib_dev.mmap = mlx5_ib_mmap;
+ dev->ib_dev.mmap_free = mlx5_ib_mmap_free;
dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
dev->ib_dev.create_ah = mlx5_ib_create_ah;
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2013-2021, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -1303,7 +1303,7 @@
return 0;
}
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
@@ -1344,7 +1344,7 @@
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
- u32 max_num_sg)
+ u32 max_num_sg, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
@@ -1389,7 +1389,7 @@
goto err_free_in;
mr->desc_size = sizeof(struct mlx5_klm);
mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
+ } else if (mr_type == IB_MR_TYPE_INTEGRITY) {
u32 psv_index[2];
MLX5_SET(mkc, mkc, bsf_en, 1);
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c
@@ -29,6 +29,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
#include "mlx5_ib.h"
/* not supported currently */
@@ -679,11 +680,15 @@
return err;
}
-static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
+static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq,
+ struct ib_udata *udata)
{
- struct mlx5_ib_ucontext *context;
+ struct mlx5_ib_ucontext *context =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx5_ib_ucontext,
+ ibucontext);
- context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &rwq->db);
if (rwq->umem)
ib_umem_release(rwq->umem);
@@ -770,6 +775,7 @@
__be64 *pas;
void *qpc;
int err;
+ u16 uid;
u32 uar_flags;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
@@ -837,6 +843,9 @@
goto err_umem;
}
+ uid = (attr->qp_type != IB_QPT_XRC_TGT &&
+ attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
+ MLX5_SET(create_qp_in, *in, uid, uid);
pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@ -885,11 +894,15 @@
}
static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp,
- struct mlx5_ib_qp_base *base)
+ struct mlx5_ib_qp_base *base,
+ struct ib_udata *udata)
{
- struct mlx5_ib_ucontext *context;
+ struct mlx5_ib_ucontext *context =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx5_ib_ucontext,
+ ibucontext);
- context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
if (base->ubuffer.umem)
ib_umem_release(base->ubuffer.umem);
@@ -1045,19 +1058,21 @@
}
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
- struct mlx5_ib_sq *sq, u32 tdn)
+ struct mlx5_ib_sq *sq, u32 tdn,
+ struct ib_pd *pd)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+ MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
}
static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
- struct mlx5_ib_sq *sq)
+ struct mlx5_ib_sq *sq, struct ib_pd *pd)
{
- mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+ mlx5_core_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid);
}
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
@@ -1093,6 +1108,7 @@
goto err_umem;
}
+ MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid);
sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
@@ -1154,7 +1170,8 @@
}
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq, void *qpin)
+ struct mlx5_ib_rq *rq, void *qpin,
+ struct ib_pd *pd)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
@@ -1175,6 +1192,7 @@
if (!in)
return -ENOMEM;
+ MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
MLX5_SET(rqc, rqc, vlan_strip_disable, 1);
MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE);
@@ -1216,7 +1234,8 @@
}
static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq, u32 tdn)
+ struct mlx5_ib_rq *rq, u32 tdn,
+ struct ib_pd *pd)
{
u32 *in;
void *tirc;
@@ -1228,6 +1247,7 @@
if (!in)
return -ENOMEM;
+ MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
@@ -1241,9 +1261,10 @@
}
static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq)
+ struct mlx5_ib_rq *rq,
+ struct ib_pd *pd)
{
- mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+ mlx5_core_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid);
}
static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
@@ -1260,7 +1281,7 @@
u32 tdn = mucontext->tdn;
if (qp->sq.wqe_cnt) {
- err = create_raw_packet_qp_tis(dev, sq, tdn);
+ err = create_raw_packet_qp_tis(dev, sq, tdn, pd);
if (err)
return err;
@@ -1274,12 +1295,12 @@
if (qp->rq.wqe_cnt) {
rq->base.container_mibqp = qp;
- err = create_raw_packet_qp_rq(dev, rq, in);
+ err = create_raw_packet_qp_rq(dev, rq, in, pd);
if (err)
goto err_destroy_sq;
- err = create_raw_packet_qp_tir(dev, rq, tdn);
+ err = create_raw_packet_qp_tir(dev, rq, tdn, pd);
if (err)
goto err_destroy_rq;
}
@@ -1296,7 +1317,7 @@
return err;
destroy_raw_packet_qp_sq(dev, sq);
err_destroy_tis:
- destroy_raw_packet_qp_tis(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq, pd);
return err;
}
@@ -1309,13 +1330,13 @@
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
if (qp->rq.wqe_cnt) {
- destroy_raw_packet_qp_tir(dev, rq);
+ destroy_raw_packet_qp_tir(dev, rq, qp->ibqp.pd);
destroy_raw_packet_qp_rq(dev, rq);
}
if (qp->sq.wqe_cnt) {
destroy_raw_packet_qp_sq(dev, sq);
- destroy_raw_packet_qp_tis(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd);
}
}
@@ -1333,7 +1354,8 @@
static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
- mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
+ mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn,
+ to_mpd(qp->ibqp.pd)->uid);
}
static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
@@ -1405,6 +1427,7 @@
if (!in)
return -ENOMEM;
+ MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
MLX5_SET(tirc, tirc, disp_type,
MLX5_TIRC_DISP_TYPE_INDIRECT);
@@ -1854,7 +1877,7 @@
err_create:
if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(dev, pd, qp, base);
+ destroy_qp_user(dev, pd, qp, base, udata);
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
@@ -1964,7 +1987,8 @@
const struct mlx5_modify_raw_qp_param *raw_qp_param,
u8 lag_tx_affinity);
-static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_udata *udata)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
@@ -2033,7 +2057,7 @@
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base);
+ destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata);
}
static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -2163,7 +2187,7 @@
return &qp->ibqp;
}
-int mlx5_ib_destroy_qp(struct ib_qp *qp)
+int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
@@ -2171,7 +2195,7 @@
if (unlikely(qp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_destroy_qp(qp);
- destroy_qp_common(dev, mqp);
+ destroy_qp_common(dev, mqp, udata);
kfree(mqp);
@@ -2244,7 +2268,8 @@
}
static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, u8 sl)
+ struct mlx5_ib_sq *sq, u8 sl,
+ struct ib_pd *pd)
{
void *in;
void *tisc;
@@ -2257,6 +2282,7 @@
return -ENOMEM;
MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+ MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
@@ -2269,7 +2295,8 @@
}
static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, u8 tx_affinity)
+ struct mlx5_ib_sq *sq, u8 tx_affinity,
+ struct ib_pd *pd)
{
void *in;
void *tisc;
@@ -2282,6 +2309,7 @@
return -ENOMEM;
MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
+ MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
@@ -2362,7 +2390,7 @@
if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
return modify_raw_packet_eth_prio(dev->mdev,
&qp->raw_packet_qp.sq,
- ah->sl & 0xf);
+ ah->sl & 0xf, qp->ibqp.pd);
return 0;
}
@@ -2510,9 +2538,9 @@
return result;
}
-static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq, int new_state,
- const struct mlx5_modify_raw_qp_param *raw_qp_param)
+static int modify_raw_packet_qp_rq(
+ struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state,
+ const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
{
void *in;
void *rqc;
@@ -2526,6 +2554,7 @@
MLX5_SET(modify_rq_in, in, rqn, rq->base.mqp.qpn);
MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
MLX5_SET(rqc, rqc, state, new_state);
@@ -2552,7 +2581,8 @@
}
static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, int new_state)
+ struct mlx5_ib_sq *sq, int new_state,
+ struct ib_pd *pd)
{
void *in;
void *sqc;
@@ -2565,6 +2595,7 @@
return -ENOMEM;
MLX5_SET(modify_sq_in, in, sqn, sq->base.mqp.qpn);
+ MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(modify_sq_in, in, sq_state, sq->state);
sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
@@ -2588,6 +2619,8 @@
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ int modify_rq = !!qp->rq.wqe_cnt;
+ int modify_sq = !!qp->sq.wqe_cnt;
int rq_state;
int sq_state;
int err;
@@ -2605,10 +2638,11 @@
rq_state = MLX5_RQC_STATE_RST;
sq_state = MLX5_SQC_STATE_RST;
break;
- case MLX5_CMD_OP_INIT2INIT_QP:
- case MLX5_CMD_OP_INIT2RTR_QP:
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
+ return raw_qp_param->set_mask ? -EINVAL : 0;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
if (raw_qp_param->set_mask)
return -EINVAL;
else
@@ -2618,21 +2652,23 @@
return -EINVAL;
}
- if (qp->rq.wqe_cnt) {
- err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
+ if (modify_rq) {
+ err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param,
+ qp->ibqp.pd);
if (err)
return err;
}
- if (qp->sq.wqe_cnt) {
+ if (modify_sq) {
if (tx_affinity) {
err = modify_raw_packet_tx_affinity(dev->mdev, sq,
- tx_affinity);
+ tx_affinity,
+ qp->ibqp.pd);
if (err)
return err;
}
- return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, qp->ibqp.pd);
}
return 0;
@@ -3815,10 +3851,10 @@
return 0;
}
-static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned *idx,
- int *size, int nreq, bool send_signaled, bool solicited)
+static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ const struct ib_send_wr *wr, unsigned *idx,
+ int *size, int nreq, int send_flags)
{
if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
return -ENOMEM;
@@ -3829,8 +3865,10 @@
*(uint32_t *)(*seg + 8) = 0;
(*ctrl)->imm = send_ieth(wr);
(*ctrl)->fm_ce_se = qp->sq_signal_bits |
- (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
- (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
+ (send_flags & IB_SEND_SIGNALED ?
+ MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+ (send_flags & IB_SEND_SOLICITED ?
+ MLX5_WQE_CTRL_SOLICITED : 0);
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
@@ -3838,16 +3876,6 @@
return 0;
}
-static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned *idx,
- int *size, int nreq)
-{
- return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq,
- wr->send_flags & IB_SEND_SIGNALED,
- wr->send_flags & IB_SEND_SOLICITED);
-}
-
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
u8 size, unsigned idx, u64 wr_id,
@@ -3929,7 +3957,7 @@
goto out;
}
- err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
+ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq, wr->send_flags);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4001,8 +4029,8 @@
* SET_PSV WQEs are not signaled and solicited
* on error
*/
- err = __begin_wqe(qp, &seg, &ctrl, wr,
- &idx, &size, nreq, false, true);
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, nreq, IB_SEND_SOLICITED);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4022,8 +4050,8 @@
finish_wqe(qp, ctrl, size, idx, wr->wr_id,
nreq, get_fence(fence, wr),
next_fence, MLX5_OPCODE_SET_PSV);
- err = __begin_wqe(qp, &seg, &ctrl, wr,
- &idx, &size, nreq, false, true);
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, nreq, wr->send_flags);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4610,8 +4638,7 @@
}
struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+ struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_xrcd *xrcd;
@@ -4633,20 +4660,17 @@
return &xrcd->ibxrcd;
}
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
int err;
err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
- if (err) {
+ if (err)
mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
- return err;
- }
kfree(xrcd);
-
return 0;
}
@@ -4690,6 +4714,7 @@
if (!in)
return -ENOMEM;
+ MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
MLX5_SET(rqc, rqc, mem_rq_type,
MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE);
@@ -4842,22 +4867,20 @@
err_copy:
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
err_user_rq:
- destroy_user_rq(pd, rwq);
+ destroy_user_rq(pd, rwq, udata);
err:
kfree(rwq);
return ERR_PTR(err);
}
-int mlx5_ib_destroy_wq(struct ib_wq *wq)
+void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
- destroy_user_rq(wq->pd, rwq);
+ destroy_user_rq(wq->pd, rwq, udata);
kfree(rwq);
-
- return 0;
}
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
@@ -4911,6 +4934,9 @@
for (i = 0; i < sz; i++)
MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
+ rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid;
+ MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid);
+
err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
kvfree(in);
@@ -4929,7 +4955,7 @@
return &rwq_ind_tbl->ib_rwq_ind_tbl;
err_copy:
- mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+ mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
err:
kfree(rwq_ind_tbl);
return ERR_PTR(err);
@@ -4940,7 +4966,7 @@
struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
- mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+ mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
kfree(rwq_ind_tbl);
return 0;
@@ -4992,6 +5018,7 @@
if (wq_state == IB_WQS_ERR)
wq_state = MLX5_RQC_STATE_ERR;
MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid);
MLX5_SET(rqc, rqc, state, wq_state);
err = mlx5_core_modify_rq(dev->mdev, in, inlen);
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_srq.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_srq.c
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_srq.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_srq.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,12 +31,10 @@
#include <linux/slab.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
#include "mlx5_ib.h"
-/* not supported currently */
-static int srq_signature;
-
static void *get_wqe(struct mlx5_ib_srq *srq, int n)
{
return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
@@ -73,6 +71,8 @@
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd = {};
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
size_t ucmdlen;
int err;
int npages;
@@ -96,17 +96,15 @@
udata->inlen - sizeof(ucmd)))
return -EINVAL;
- if (in->type == IB_SRQT_XRC) {
- err = get_srq_user_index(to_mucontext(pd->uobject->context),
- &ucmd, udata->inlen, &uidx);
+ if (in->type != IB_SRQT_BASIC) {
+ err = get_srq_user_index(ucontext, &ucmd, udata->inlen, &uidx);
if (err)
return err;
}
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
- srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
- 0, 0);
+ srq->umem = ib_umem_get(&ucontext->ibucontext, ucmd.buf_addr, buf_size, 0, 0);
if (IS_ERR(srq->umem)) {
mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
err = PTR_ERR(srq->umem);
@@ -130,8 +128,7 @@
mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
- err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
- ucmd.db_addr, &srq->db);
+ err = mlx5_ib_db_map_user(ucontext, ucmd.db_addr, &srq->db);
if (err) {
mlx5_ib_dbg(dev, "map doorbell failed\n");
goto err_in;
@@ -139,9 +136,11 @@
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->page_offset = offset;
+ in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
- in->type == IB_SRQT_XRC)
+ in->type != IB_SRQT_BASIC)
in->user_index = uidx;
+
return 0;
err_in:
@@ -192,16 +191,14 @@
srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
- mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
- (unsigned long)(srq->msrq.max * sizeof(u64)));
err = -ENOMEM;
goto err_in;
}
- srq->wq_sig = !!srq_signature;
+ srq->wq_sig = 0;
in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
- in->type == IB_SRQT_XRC)
+ in->type != IB_SRQT_BASIC)
in->user_index = MLX5_IB_DEFAULT_UIDX;
return 0;
@@ -217,9 +214,15 @@
return err;
}
-static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
+static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
+ struct ib_udata *udata)
{
- mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+ mlx5_ib_db_unmap_user(
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx5_ib_ucontext,
+ ibucontext),
+ &srq->db);
ib_umem_release(srq->umem);
}
@@ -231,16 +234,16 @@
mlx5_db_free(dev->mdev, &srq->db);
}
-struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx5_ib_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_ib_srq *srq;
+ struct mlx5_ib_dev *dev = to_mdev(ib_srq->device);
+ struct mlx5_ib_srq *srq = to_msrq(ib_srq);
size_t desc_size;
size_t buf_size;
int err;
- struct mlx5_srq_attr in = {0};
+ struct mlx5_srq_attr in = {};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
/* Sanity check SRQ size before proceeding */
@@ -248,13 +251,9 @@
mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
init_attr->attr.max_wr,
max_srq_wqes);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
- srq = kmalloc(sizeof(*srq), GFP_KERNEL);
- if (!srq)
- return ERR_PTR(-ENOMEM);
-
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
@@ -262,50 +261,50 @@
desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
- if (desc_size == 0 || srq->msrq.max_gs > desc_size) {
- err = -EINVAL;
- goto err_srq;
- }
+ if (desc_size == 0 || srq->msrq.max_gs > desc_size)
+ return -EINVAL;
+
desc_size = roundup_pow_of_two(desc_size);
desc_size = max_t(size_t, 32, desc_size);
- if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) {
- err = -EINVAL;
- goto err_srq;
- }
+ if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg))
+ return -EINVAL;
+
srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
sizeof(struct mlx5_wqe_data_seg);
srq->msrq.wqe_shift = ilog2(desc_size);
buf_size = srq->msrq.max * desc_size;
- if (buf_size < desc_size) {
- err = -EINVAL;
- goto err_srq;
- }
+ if (buf_size < desc_size)
+ return -EINVAL;
+
in.type = init_attr->srq_type;
- if (pd->uobject)
- err = create_srq_user(pd, srq, &in, udata, buf_size);
+ if (udata)
+ err = create_srq_user(ib_srq->pd, srq, &in, udata, buf_size);
else
err = create_srq_kernel(dev, srq, &in, buf_size);
- if (err || !in.pas) {
+ if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
- pd->uobject ? "user" : "kernel", err);
- goto err_srq;
+ udata ? "user" : "kernel", err);
+ return err;
}
in.log_size = ilog2(srq->msrq.max);
in.wqe_shift = srq->msrq.wqe_shift - 4;
if (srq->wq_sig)
in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
- if (init_attr->srq_type == IB_SRQT_XRC) {
+
+ if (init_attr->srq_type == IB_SRQT_XRC)
in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
- in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn;
- } else if (init_attr->srq_type == IB_SRQT_BASIC) {
+ else
in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
+
+ if (ib_srq_has_cq(init_attr->srq_type))
+ in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn;
+ else
in.cqn = to_mcq(dev->devr.c0)->mcq.cqn;
- }
- in.pd = to_mpd(pd)->pdn;
+ in.pd = to_mpd(ib_srq->pd)->pdn;
in.db_record = srq->db.dma;
err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
kvfree(in.pas);
@@ -319,7 +318,7 @@
srq->msrq.event = mlx5_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject)
+ if (udata)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
mlx5_ib_dbg(dev, "copy to user failed\n");
err = -EFAULT;
@@ -328,21 +327,18 @@
init_attr->attr.max_wr = srq->msrq.max - 1;
- return &srq->ibsrq;
+ return 0;
err_core:
mlx5_core_destroy_srq(dev->mdev, &srq->msrq);
err_usr_kern_srq:
- if (pd->uobject)
- destroy_srq_user(pd, srq);
+ if (udata)
+ destroy_srq_user(ib_srq->pd, srq, udata);
else
destroy_srq_kernel(dev, srq);
-err_srq:
- kfree(srq);
-
- return ERR_PTR(err);
+ return err;
}
int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
@@ -395,7 +391,7 @@
return ret;
}
-int mlx5_ib_destroy_srq(struct ib_srq *srq)
+void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(srq->device);
struct mlx5_ib_srq *msrq = to_msrq(srq);
@@ -403,14 +399,16 @@
mlx5_core_destroy_srq(dev->mdev, &msrq->msrq);
if (srq->uobject) {
- mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
+ mlx5_ib_db_unmap_user(
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx5_ib_ucontext,
+ ibucontext),
+ &msrq->db);
ib_umem_release(msrq->umem);
} else {
destroy_srq_kernel(dev, msrq);
}
-
- kfree(srq);
- return 0;
}
void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
diff --git a/sys/dev/mlx5/mlx5_ifc.h b/sys/dev/mlx5/mlx5_ifc.h
--- a/sys/dev/mlx5/mlx5_ifc.h
+++ b/sys/dev/mlx5/mlx5_ifc.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2013-2020, Mellanox Technologies. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,6 +31,7 @@
#include <dev/mlx5/mlx5_fpga/mlx5_ifc_fpga.h>
enum {
+ MLX5_EVENT_TYPE_NOTIFY_ANY = 0x0,
MLX5_EVENT_TYPE_COMP = 0x0,
MLX5_EVENT_TYPE_PATH_MIG = 0x1,
MLX5_EVENT_TYPE_COMM_EST = 0x2,
@@ -51,6 +52,7 @@
MLX5_EVENT_TYPE_GPIO_EVENT = 0x15,
MLX5_EVENT_TYPE_CODING_PORT_MODULE_EVENT = 0x16,
MLX5_EVENT_TYPE_TEMP_WARN_EVENT = 0x17,
+ MLX5_EVENT_TYPE_XRQ_ERROR = 0x18,
MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19,
MLX5_EVENT_TYPE_CODING_DCBX_CHANGE_EVENT = 0x1e,
MLX5_EVENT_TYPE_CODING_PPS_EVENT = 0x25,
@@ -83,6 +85,24 @@
MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
};
+enum {
+ MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b,
+ MLX5_OBJ_TYPE_MKEY = 0xff01,
+ MLX5_OBJ_TYPE_QP = 0xff02,
+ MLX5_OBJ_TYPE_PSV = 0xff03,
+ MLX5_OBJ_TYPE_RMP = 0xff04,
+ MLX5_OBJ_TYPE_XRC_SRQ = 0xff05,
+ MLX5_OBJ_TYPE_RQ = 0xff06,
+ MLX5_OBJ_TYPE_SQ = 0xff07,
+ MLX5_OBJ_TYPE_TIR = 0xff08,
+ MLX5_OBJ_TYPE_TIS = 0xff09,
+ MLX5_OBJ_TYPE_DCT = 0xff0a,
+ MLX5_OBJ_TYPE_XRQ = 0xff0b,
+ MLX5_OBJ_TYPE_RQT = 0xff0e,
+ MLX5_OBJ_TYPE_FLOW_COUNTER = 0xff0f,
+ MLX5_OBJ_TYPE_CQ = 0xff10,
+};
+
enum {
MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
MLX5_CMD_OP_QUERY_ADAPTER = 0x101,
@@ -140,6 +160,16 @@
MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION = 0x714,
MLX5_CMD_OP_SET_DC_CNAK_TRACE = 0x715,
MLX5_CMD_OP_QUERY_DC_CNAK_TRACE = 0x716,
+ MLX5_CMD_OP_CREATE_XRQ = 0x717,
+ MLX5_CMD_OP_DESTROY_XRQ = 0x718,
+ MLX5_CMD_OP_QUERY_XRQ = 0x719,
+ MLX5_CMD_OP_ARM_XRQ = 0x71a,
+ MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725,
+ MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726,
+ MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727,
+ MLX5_CMD_OP_RELEASE_XRQ_ERROR = 0x729,
+ MLX5_CMD_OP_MODIFY_XRQ = 0x72a,
+
MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750,
MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751,
MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
@@ -246,8 +276,12 @@
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a,
MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b,
MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c,
- MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d,
- MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e,
+ MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d,
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e,
+ MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT = 0x93f,
+ MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940,
+ MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
+ MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942,
MLX5_CMD_OP_FPGA_CREATE_QP = 0x960,
MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961,
MLX5_CMD_OP_FPGA_QUERY_QP = 0x962,
@@ -257,7 +291,16 @@
MLX5_CMD_OP_MODIFY_GENERAL_OBJ = 0xa01,
MLX5_CMD_OP_QUERY_GENERAL_OBJ = 0xa02,
MLX5_CMD_OP_DESTROY_GENERAL_OBJ = 0xa03,
+ MLX5_CMD_OP_CREATE_UCTX = 0xa04,
+ MLX5_CMD_OP_DESTROY_UCTX = 0xa06,
+ MLX5_CMD_OP_CREATE_UMEM = 0xa08,
+ MLX5_CMD_OP_DESTROY_UMEM = 0xa0a,
+};
+/* Valid range for general commands that don't work over an object */
+enum {
+ MLX5_CMD_OP_GENERAL_START = 0xb00,
+ MLX5_CMD_OP_GENERAL_END = 0xd00,
};
enum {
@@ -947,6 +990,12 @@
u8 reserved_6[0x700];
};
+struct mlx5_ifc_device_event_cap_bits {
+ u8 user_affiliated_events[4][0x40];
+
+ u8 user_unaffiliated_events[4][0x40];
+};
+
enum {
MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x1,
MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2,
@@ -1043,6 +1092,11 @@
MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_ENABLED = 0x3,
};
+enum {
+ MLX5_UCTX_CAP_RAW_TX = 1UL << 0,
+ MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
+};
+
enum {
MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING = 0x0,
MLX5_SQ_TIMESTAMP_FORMAT_CAP_REAL_TIME = 0x1,
@@ -1060,7 +1114,8 @@
u8 log_max_srq_sz[0x8];
u8 log_max_qp_sz[0x8];
- u8 reserved_1[0xb];
+ u8 event_cap[0x1];
+ u8 reserved_1[0xa];
u8 log_max_qp[0x5];
u8 reserved_2[0xb];
@@ -1338,7 +1393,14 @@
u8 cqe_compression_timeout[0x10];
u8 cqe_compression_max_num[0x10];
- u8 reserved_69[0x220];
+ u8 reserved_5e0[0xc0];
+
+ u8 uctx_cap[0x20];
+
+ u8 reserved_6c0[0xc0];
+
+ u8 vhca_tunnel_commands[0x40];
+ u8 reserved_at_7c0[0x40];
};
enum mlx5_flow_destination_type {
@@ -1436,7 +1498,9 @@
u8 reserved_6[0x3];
u8 log_wq_sz[0x5];
- u8 reserved_7[0x15];
+ u8 dbr_umem_valid[0x1];
+ u8 wq_umem_valid[0x1];
+ u8 reserved_7[0x13];
u8 single_wqe_log_num_of_strides[0x3];
u8 two_byte_shift_en[0x1];
u8 reserved_8[0x4];
@@ -2117,20 +2181,10 @@
u8 dc_access_key[0x40];
- u8 rdma_active[0x1];
- u8 comm_est[0x1];
- u8 suspended[0x1];
- u8 reserved_31[0x5];
- u8 send_msg_psn[0x18];
-
- u8 reserved_32[0x8];
- u8 rcv_msg_psn[0x18];
+ u8 reserved_at_680[0x3];
+ u8 dbr_umem_valid[0x1];
- u8 rdma_va[0x40];
-
- u8 rdma_key[0x20];
-
- u8 reserved_33[0x20];
+ u8 reserved_at_684[0xbc];
};
struct mlx5_ifc_roce_addr_layout_bits {
@@ -2222,7 +2276,8 @@
u8 xrcd[0x18];
u8 page_offset[0x6];
- u8 reserved_2[0x2];
+ u8 reserved_at_46[0x1];
+ u8 dbr_umem_valid[0x1];
u8 cqn[0x18];
u8 reserved_3[0x20];
@@ -2643,6 +2698,9 @@
MLX5_ACCESS_MODE_PA = 0x0,
MLX5_ACCESS_MODE_MTT = 0x1,
MLX5_ACCESS_MODE_KLM = 0x2,
+ MLX5_ACCESS_MODE_KSM = 0x3,
+ MLX5_ACCESS_MODE_SW_ICM = 0x4,
+ MLX5_ACCESS_MODE_MEMIC = 0x5,
};
struct mlx5_ifc_mkc_bits {
@@ -3003,7 +3061,9 @@
struct mlx5_ifc_cqc_bits {
u8 status[0x4];
- u8 reserved_0[0x4];
+ u8 reserved_at_4[0x2];
+ u8 dbr_umem_valid[0x1];
+ u8 reserved_at_7[0x1];
u8 cqe_sz[0x3];
u8 cc[0x1];
u8 reserved_1[0x1];
@@ -3117,6 +3177,54 @@
u8 crc16[0x10];
};
+enum {
+ MLX5_XRQC_STATE_GOOD = 0x0,
+ MLX5_XRQC_STATE_ERROR = 0x1,
+};
+
+enum {
+ MLX5_XRQC_TOPOLOGY_NO_SPECIAL_TOPOLOGY = 0x0,
+ MLX5_XRQC_TOPOLOGY_TAG_MATCHING = 0x1,
+};
+
+enum {
+ MLX5_XRQC_OFFLOAD_RNDV = 0x1,
+};
+
+struct mlx5_ifc_tag_matching_topology_context_bits {
+ u8 log_matching_list_sz[0x4];
+ u8 reserved_at_4[0xc];
+ u8 append_next_index[0x10];
+
+ u8 sw_phase_cnt[0x10];
+ u8 hw_phase_cnt[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_xrqc_bits {
+ u8 state[0x4];
+ u8 rlkey[0x1];
+ u8 reserved_at_5[0xf];
+ u8 topology[0x4];
+ u8 reserved_at_18[0x4];
+ u8 offload[0x4];
+
+ u8 reserved_at_20[0x8];
+ u8 user_index[0x18];
+
+ u8 reserved_at_40[0x8];
+ u8 cqn[0x18];
+
+ u8 reserved_at_60[0xa0];
+
+ struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
+
+ u8 reserved_at_180[0x280];
+
+ struct mlx5_ifc_wq_bits wq;
+};
+
struct mlx5_ifc_nodnic_port_config_reg_bits {
struct mlx5_ifc_nodnic_event_word_bits event;
@@ -3522,7 +3630,7 @@
struct mlx5_ifc_sqd2rts_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -3956,6 +4064,30 @@
u8 reserved_5[0x80];
};
+struct mlx5_ifc_query_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
+struct mlx5_ifc_query_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
struct mlx5_ifc_resume_qp_out_bits {
u8 status[0x8];
u8 reserved_0[0x18];
@@ -3995,7 +4127,7 @@
struct mlx5_ifc_query_xrc_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -5070,6 +5202,93 @@
u8 reserved_2[0x40];
};
+struct mlx5_ifc_packet_reformat_context_in_bits {
+ u8 reserved_at_0[0x5];
+ u8 reformat_type[0x3];
+ u8 reserved_at_8[0xe];
+ u8 reformat_data_size[0xa];
+
+ u8 reserved_at_20[0x10];
+ u8 reformat_data[2][0x8];
+
+ u8 more_reformat_data[0][0x8];
+};
+
+struct mlx5_ifc_query_packet_reformat_context_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0xa0];
+
+ struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0];
+};
+
+struct mlx5_ifc_query_packet_reformat_context_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 packet_reformat_id[0x20];
+
+ u8 reserved_at_60[0xa0];
+};
+
+struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 packet_reformat_id[0x20];
+
+ u8 reserved_at_60[0x20];
+};
+
+enum mlx5_reformat_ctx_type {
+ MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
+ MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
+ MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
+ MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
+};
+
+struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0xa0];
+
+ struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context;
+};
+
+struct mlx5_ifc_dealloc_packet_reformat_context_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_packet_reformat_context_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 packet_reformat_id[0x20];
+
+ u8 reserved_60[0x20];
+};
+
struct mlx5_ifc_query_cq_out_bits {
u8 status[0x8];
u8 reserved_0[0x18];
@@ -5396,7 +5615,7 @@
struct mlx5_ifc_modify_tis_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -5430,7 +5649,7 @@
struct mlx5_ifc_modify_tir_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -5458,7 +5677,7 @@
struct mlx5_ifc_modify_sq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -5528,7 +5747,7 @@
struct mlx5_ifc_modify_rqt_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -5561,7 +5780,7 @@
struct mlx5_ifc_modify_rq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -5597,7 +5816,7 @@
struct mlx5_ifc_modify_rmp_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -5807,7 +6026,7 @@
struct mlx5_ifc_modify_cq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -5819,7 +6038,12 @@
struct mlx5_ifc_cqc_bits cq_context;
- u8 reserved_3[0x600];
+ u8 reserved_at_280[0x60];
+
+ u8 cq_umem_valid[0x1];
+ u8 reserved_at_2e1[0x1f];
+
+ u8 reserved_at_300[0x580];
u8 pas[0][0x40];
};
@@ -6162,7 +6386,7 @@
struct mlx5_ifc_detach_from_mcg_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6186,7 +6410,7 @@
struct mlx5_ifc_destroy_xrc_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6208,7 +6432,7 @@
struct mlx5_ifc_destroy_tis_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6230,7 +6454,7 @@
struct mlx5_ifc_destroy_tir_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6252,7 +6476,7 @@
struct mlx5_ifc_destroy_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6324,7 +6548,7 @@
struct mlx5_ifc_destroy_rqt_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6609,7 +6833,7 @@
struct mlx5_ifc_destroy_cq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6712,7 +6936,7 @@
struct mlx5_ifc_dealloc_xrcd_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6756,7 +6980,7 @@
struct mlx5_ifc_dealloc_transport_domain_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6878,7 +7102,7 @@
struct mlx5_ifc_dealloc_pd_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6911,6 +7135,30 @@
u8 reserved_3[0x20];
};
+struct mlx5_ifc_create_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_create_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
struct mlx5_ifc_deactivate_tracer_out_bits {
u8 status[0x8];
u8 reserved_0[0x18];
@@ -6946,7 +7194,7 @@
struct mlx5_ifc_create_xrc_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6955,7 +7203,12 @@
struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
- u8 reserved_3[0x600];
+ u8 reserved_at_280[0x60];
+
+ u8 xrc_srq_umem_valid[0x1];
+ u8 reserved_at_2e1[0x1f];
+
+ u8 reserved_at_300[0x580];
u8 pas[0][0x40];
};
@@ -6974,7 +7227,7 @@
struct mlx5_ifc_create_tis_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -6998,7 +7251,7 @@
struct mlx5_ifc_create_tir_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7022,7 +7275,7 @@
struct mlx5_ifc_create_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7108,7 +7361,7 @@
struct mlx5_ifc_create_rqt_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7156,7 +7409,7 @@
struct mlx5_ifc_create_rmp_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7196,7 +7449,10 @@
struct mlx5_ifc_qpc_bits qpc;
- u8 reserved_5[0x80];
+ u8 reserved_at_800[0x60];
+
+ u8 wq_umem_valid[0x1];
+ u8 reserved_at_861[0x1f];
u8 pas[0][0x40];
};
@@ -7282,7 +7538,8 @@
u8 reserved_2[0x20];
u8 pg_access[0x1];
- u8 reserved_3[0x1f];
+ u8 mkey_umem_valid[0x1];
+ u8 reserved_at_62[0x1e];
struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
@@ -7478,7 +7735,7 @@
struct mlx5_ifc_create_cq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7487,7 +7744,10 @@
struct mlx5_ifc_cqc_bits cq_context;
- u8 reserved_3[0x600];
+ u8 reserved_at_280[0x60];
+
+ u8 cq_umem_valid[0x1];
+ u8 reserved_at_2e1[0x59f];
u8 pas[0][0x40];
};
@@ -7535,7 +7795,7 @@
struct mlx5_ifc_attach_to_mcg_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7548,6 +7808,29 @@
u8 multicast_gid[16][0x8];
};
+struct mlx5_ifc_arm_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_arm_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x10];
+ u8 lwm[0x10];
+};
+
struct mlx5_ifc_arm_xrc_srq_out_bits {
u8 status[0x8];
u8 reserved_0[0x18];
@@ -7563,7 +7846,7 @@
struct mlx5_ifc_arm_xrc_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7590,7 +7873,7 @@
struct mlx5_ifc_arm_rq_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7638,7 +7921,7 @@
struct mlx5_ifc_alloc_xrcd_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7682,7 +7965,7 @@
struct mlx5_ifc_alloc_transport_domain_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7704,7 +7987,7 @@
struct mlx5_ifc_alloc_q_counter_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7726,7 +8009,7 @@
struct mlx5_ifc_alloc_pd_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 uid[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
@@ -7736,24 +8019,24 @@
struct mlx5_ifc_alloc_flow_counter_out_bits {
u8 status[0x8];
- u8 reserved_0[0x18];
+ u8 reserved_at_8[0x18];
u8 syndrome[0x20];
- u8 reserved_1[0x10];
- u8 flow_counter_id[0x10];
+ u8 flow_counter_id[0x20];
- u8 reserved_2[0x20];
+ u8 reserved_at_60[0x20];
};
struct mlx5_ifc_alloc_flow_counter_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 reserved_at_10[0x10];
- u8 reserved_1[0x10];
+ u8 reserved_at_20[0x10];
u8 op_mod[0x10];
- u8 reserved_2[0x40];
+ u8 reserved_at_40[0x38];
+ u8 flow_counter_bulk[0x8];
};
struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
@@ -7810,7 +8093,7 @@
struct mlx5_ifc_set_rate_limit_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -10847,4 +11130,147 @@
u8 sensor_name_lo[0x20];
};
+struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 vhca_tunnel_id[0x10];
+ u8 obj_type[0x10];
+
+ u8 obj_id[0x20];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 obj_id[0x20];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_umem_bits {
+ u8 reserved_at_0[0x80];
+
+ u8 reserved_at_80[0x1b];
+ u8 log_page_size[0x5];
+
+ u8 page_offset[0x20];
+
+ u8 num_of_mtt[0x40];
+
+ struct mlx5_ifc_mtt_bits mtt[0];
+};
+
+struct mlx5_ifc_uctx_bits {
+ u8 cap[0x20];
+
+ u8 reserved_at_20[0x160];
+};
+
+struct mlx5_ifc_create_umem_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_umem_bits umem;
+};
+
+struct mlx5_ifc_create_uctx_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_uctx_bits uctx;
+};
+
+struct mlx5_ifc_destroy_uctx_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_mtrc_string_db_param_bits {
+ u8 string_db_base_address[0x20];
+
+ u8 reserved_at_20[0x8];
+ u8 string_db_size[0x18];
+};
+
+struct mlx5_ifc_mtrc_cap_bits {
+ u8 trace_owner[0x1];
+ u8 trace_to_memory[0x1];
+ u8 reserved_at_2[0x4];
+ u8 trc_ver[0x2];
+ u8 reserved_at_8[0x14];
+ u8 num_string_db[0x4];
+
+ u8 first_string_trace[0x8];
+ u8 num_string_trace[0x8];
+ u8 reserved_at_30[0x28];
+
+ u8 log_max_trace_buffer_size[0x8];
+
+ u8 reserved_at_60[0x20];
+
+ struct mlx5_ifc_mtrc_string_db_param_bits string_db_param[8];
+
+ u8 reserved_at_280[0x180];
+};
+
+struct mlx5_ifc_mtrc_conf_bits {
+ u8 reserved_at_0[0x1c];
+ u8 trace_mode[0x4];
+ u8 reserved_at_20[0x18];
+ u8 log_trace_buffer_size[0x8];
+ u8 trace_mkey[0x20];
+ u8 reserved_at_60[0x3a0];
+};
+
+struct mlx5_ifc_mtrc_stdb_bits {
+ u8 string_db_index[0x4];
+ u8 reserved_at_4[0x4];
+ u8 read_size[0x18];
+ u8 start_offset[0x20];
+ u8 string_db_data[0];
+};
+
+struct mlx5_ifc_mtrc_ctrl_bits {
+ u8 trace_status[0x2];
+ u8 reserved_at_2[0x2];
+ u8 arm_event[0x1];
+ u8 reserved_at_5[0xb];
+ u8 modify_field_select[0x10];
+ u8 reserved_at_20[0x2b];
+ u8 current_timestamp52_32[0x15];
+ u8 current_timestamp31_0[0x20];
+ u8 reserved_at_80[0x180];
+};
+
+struct mlx5_ifc_affiliated_event_header_bits {
+ u8 reserved_at_0[0x10];
+ u8 obj_type[0x10];
+
+ u8 obj_id[0x20];
+};
+
#endif /* MLX5_IFC_H */
diff --git a/sys/dev/mlx5/qp.h b/sys/dev/mlx5/qp.h
--- a/sys/dev/mlx5/qp.h
+++ b/sys/dev/mlx5/qp.h
@@ -563,7 +563,7 @@
return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
}
-static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
+static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
{
return radix_tree_lookup(&dev->priv.mr_table.tree, key);
}
diff --git a/sys/dev/mlx5/srq.h b/sys/dev/mlx5/srq.h
--- a/sys/dev/mlx5/srq.h
+++ b/sys/dev/mlx5/srq.h
@@ -51,6 +51,7 @@
u32 user_index;
u64 db_record;
u64 *pas;
+ u16 uid;
};
struct mlx5_core_dev;
diff --git a/sys/dev/mthca/mthca_dev.h b/sys/dev/mthca/mthca_dev.h
--- a/sys/dev/mthca/mthca_dev.h
+++ b/sys/dev/mthca/mthca_dev.h
@@ -512,7 +512,8 @@
void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
- struct ib_srq_attr *attr, struct mthca_srq *srq);
+ struct ib_srq_attr *attr, struct mthca_srq *srq,
+ struct ib_udata *udata);
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
@@ -549,7 +550,8 @@
enum ib_qp_type type,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
- struct mthca_qp *qp);
+ struct mthca_qp *qp,
+ struct ib_udata *udata);
int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
@@ -558,7 +560,8 @@
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp);
+ struct mthca_sqp *sqp,
+ struct ib_udata *udata);
void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
diff --git a/sys/dev/mthca/mthca_mad.c b/sys/dev/mthca/mthca_mad.c
--- a/sys/dev/mthca/mthca_mad.c
+++ b/sys/dev/mthca/mthca_mad.c
@@ -87,13 +87,13 @@
ah_attr.port_num = port_num;
new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ &ah_attr, 0);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ ib_destroy_ah(dev->sm_ah[port_num - 1], 0);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -344,6 +344,7 @@
}
if (dev->sm_ah[p])
- ib_destroy_ah(dev->sm_ah[p]);
+ ib_destroy_ah(dev->sm_ah[p],
+ RDMA_DESTROY_AH_SLEEPABLE);
}
}
diff --git a/sys/dev/mthca/mthca_provider.c b/sys/dev/mthca/mthca_provider.c
--- a/sys/dev/mthca/mthca_provider.c
+++ b/sys/dev/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -298,17 +299,16 @@
return err;
}
-static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int mthca_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
{
- struct mthca_alloc_ucontext_resp uresp;
- struct mthca_ucontext *context;
+ struct ib_device *ibdev = uctx->device;
+ struct mthca_alloc_ucontext_resp uresp = {};
+ struct mthca_ucontext *context = to_mucontext(uctx);
int err;
if (!(to_mdev(ibdev)->active))
- return ERR_PTR(-EAGAIN);
-
- memset(&uresp, 0, sizeof uresp);
+ return -EAGAIN;
uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
if (mthca_is_memfree(to_mdev(ibdev)))
@@ -316,44 +316,33 @@
else
uresp.uarc_size = 0;
- context = kmalloc(sizeof *context, GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
-
err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
- if (err) {
- kfree(context);
- return ERR_PTR(err);
- }
+ if (err)
+ return err;
context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
if (IS_ERR(context->db_tab)) {
err = PTR_ERR(context->db_tab);
mthca_uar_free(to_mdev(ibdev), &context->uar);
- kfree(context);
- return ERR_PTR(err);
+ return err;
}
- if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
mthca_uar_free(to_mdev(ibdev), &context->uar);
- kfree(context);
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
context->reg_mr_warned = 0;
- return &context->ibucontext;
+ return 0;
}
-static int mthca_dealloc_ucontext(struct ib_ucontext *context)
+static void mthca_dealloc_ucontext(struct ib_ucontext *context)
{
mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
to_mucontext(context)->db_tab);
mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
- kfree(to_mucontext(context));
-
- return 0;
}
static int mthca_mmap_uar(struct ib_ucontext *context,
@@ -372,150 +361,115 @@
return 0;
}
-static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
- struct mthca_pd *pd;
+ struct ib_device *ibdev = ibpd->device;
+ struct mthca_pd *pd = to_mpd(ibpd);
int err;
- pd = kmalloc(sizeof *pd, GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
-
- err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
- if (err) {
- kfree(pd);
- return ERR_PTR(err);
- }
+ err = mthca_pd_alloc(to_mdev(ibdev), !udata, pd);
+ if (err)
+ return err;
- if (context) {
+ if (udata) {
if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
mthca_pd_free(to_mdev(ibdev), pd);
- kfree(pd);
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
}
- return &pd->ibpd;
+ return 0;
}
-static int mthca_dealloc_pd(struct ib_pd *pd)
+static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
- kfree(pd);
-
- return 0;
}
-static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
-{
- int err;
- struct mthca_ah *ah;
-
- ah = kmalloc(sizeof *ah, GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
+static int mthca_ah_create(struct ib_ah *ibah,
+ struct ib_ah_attr *init_attr, u32 flags,
+ struct ib_udata *udata)
- err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
- if (err) {
- kfree(ah);
- return ERR_PTR(err);
- }
+{
+ struct mthca_ah *ah = to_mah(ibah);
- return &ah->ibah;
+ return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd),
+ init_attr, ah);
}
-static int mthca_ah_destroy(struct ib_ah *ah)
+static void mthca_ah_destroy(struct ib_ah *ah, u32 flags)
{
mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
- kfree(ah);
-
- return 0;
}
-static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+static int mthca_create_srq(struct ib_srq *ibsrq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mthca_create_srq ucmd;
- struct mthca_ucontext *context = NULL;
- struct mthca_srq *srq;
+ struct mthca_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
+ struct mthca_srq *srq = to_msrq(ibsrq);
int err;
if (init_attr->srq_type != IB_SRQT_BASIC)
- return ERR_PTR(-ENOSYS);
-
- srq = kmalloc(sizeof *srq, GFP_KERNEL);
- if (!srq)
- return ERR_PTR(-ENOMEM);
-
- if (pd->uobject) {
- context = to_mucontext(pd->uobject->context);
+ return -EOPNOTSUPP;
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
- err = -EFAULT;
- goto err_free;
- }
+ if (udata) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
- err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ err = mthca_map_user_db(to_mdev(ibsrq->device), &context->uar,
context->db_tab, ucmd.db_index,
ucmd.db_page);
if (err)
- goto err_free;
+ return err;
srq->mr.ibmr.lkey = ucmd.lkey;
srq->db_index = ucmd.db_index;
}
- err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
- &init_attr->attr, srq);
+ err = mthca_alloc_srq(to_mdev(ibsrq->device), to_mpd(ibsrq->pd),
+ &init_attr->attr, srq, udata);
- if (err && pd->uobject)
- mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
+ if (err && udata)
+ mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar,
context->db_tab, ucmd.db_index);
if (err)
- goto err_free;
+ return err;
- if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
- mthca_free_srq(to_mdev(pd->device), srq);
- err = -EFAULT;
- goto err_free;
+ if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
+ mthca_free_srq(to_mdev(ibsrq->device), srq);
+ return -EFAULT;
}
- return &srq->ibsrq;
-
-err_free:
- kfree(srq);
-
- return ERR_PTR(err);
+ return 0;
}
-static int mthca_destroy_srq(struct ib_srq *srq)
+static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
- struct mthca_ucontext *context;
-
- if (srq->uobject) {
- context = to_mucontext(srq->uobject->context);
+ if (udata) {
+ struct mthca_ucontext *context =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mthca_ucontext,
+ ibucontext);
mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
context->db_tab, to_msrq(srq)->db_index);
}
mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
- kfree(srq);
-
- return 0;
}
static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
+ struct mthca_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
struct mthca_create_qp ucmd;
struct mthca_qp *qp;
int err;
@@ -528,15 +482,11 @@
case IB_QPT_UC:
case IB_QPT_UD:
{
- struct mthca_ucontext *context;
-
- qp = kmalloc(sizeof *qp, GFP_KERNEL);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
- if (pd->uobject) {
- context = to_mucontext(pd->uobject->context);
-
+ if (udata) {
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
kfree(qp);
return ERR_PTR(-EFAULT);
@@ -571,11 +521,9 @@
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->qp_type, init_attr->sq_sig_type,
- &init_attr->cap, qp);
-
- if (err && pd->uobject) {
- context = to_mucontext(pd->uobject->context);
+ &init_attr->cap, qp, udata);
+ if (err && udata) {
mthca_unmap_user_db(to_mdev(pd->device),
&context->uar,
context->db_tab,
@@ -592,11 +540,7 @@
case IB_QPT_SMI:
case IB_QPT_GSI:
{
- /* Don't allow userspace to create special QPs */
- if (pd->uobject)
- return ERR_PTR(-EINVAL);
-
- qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
@@ -607,7 +551,7 @@
to_mcq(init_attr->recv_cq),
init_attr->sq_sig_type, &init_attr->cap,
qp->ibqp.qp_num, init_attr->port_num,
- to_msqp(qp));
+ to_msqp(qp), udata);
break;
}
default:
@@ -629,64 +573,68 @@
return &qp->ibqp;
}
-static int mthca_destroy_qp(struct ib_qp *qp)
+static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
- if (qp->uobject) {
+ if (udata) {
+ struct mthca_ucontext *context =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mthca_ucontext,
+ ibucontext);
+
mthca_unmap_user_db(to_mdev(qp->device),
- &to_mucontext(qp->uobject->context)->uar,
- to_mucontext(qp->uobject->context)->db_tab,
+ &context->uar,
+ context->db_tab,
to_mqp(qp)->sq.db_index);
mthca_unmap_user_db(to_mdev(qp->device),
- &to_mucontext(qp->uobject->context)->uar,
- to_mucontext(qp->uobject->context)->db_tab,
+ &context->uar,
+ context->db_tab,
to_mqp(qp)->rq.db_index);
}
mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
- kfree(qp);
+ kfree(to_mqp(qp));
return 0;
}
-static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mthca_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
struct mthca_create_cq ucmd;
struct mthca_cq *cq;
int nent;
int err;
+ struct mthca_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EOPNOTSUPP;
if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
- if (context) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
- return ERR_PTR(-EFAULT);
+ if (udata) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
- err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
- to_mucontext(context)->db_tab,
- ucmd.set_db_index, ucmd.set_db_page);
+ err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
+ context->db_tab, ucmd.set_db_index,
+ ucmd.set_db_page);
if (err)
- return ERR_PTR(err);
+ return err;
- err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
- to_mucontext(context)->db_tab,
- ucmd.arm_db_index, ucmd.arm_db_page);
+ err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
+ context->db_tab, ucmd.arm_db_index,
+ ucmd.arm_db_page);
if (err)
goto err_unmap_set;
}
- cq = kmalloc(sizeof *cq, GFP_KERNEL);
- if (!cq) {
- err = -ENOMEM;
- goto err_unmap_arm;
- }
+ cq = to_mcq(ibcq);
- if (context) {
+ if (udata) {
cq->buf.mr.ibmr.lkey = ucmd.lkey;
cq->set_ci_db_index = ucmd.set_db_index;
cq->arm_db_index = ucmd.arm_db_index;
@@ -695,37 +643,33 @@
for (nent = 1; nent <= entries; nent <<= 1)
; /* nothing */
- err = mthca_init_cq(to_mdev(ibdev), nent,
- context ? to_mucontext(context) : NULL,
- context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
+ err = mthca_init_cq(to_mdev(ibdev), nent, context,
+ udata ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
cq);
if (err)
- goto err_free;
+ goto err_unmap_arm;
- if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
+ if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) {
mthca_free_cq(to_mdev(ibdev), cq);
err = -EFAULT;
- goto err_free;
+ goto err_unmap_arm;
}
cq->resize_buf = NULL;
- return &cq->ibcq;
-
-err_free:
- kfree(cq);
+ return 0;
err_unmap_arm:
- if (context)
- mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
- to_mucontext(context)->db_tab, ucmd.arm_db_index);
+ if (udata)
+ mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
+ context->db_tab, ucmd.arm_db_index);
err_unmap_set:
- if (context)
- mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
- to_mucontext(context)->db_tab, ucmd.set_db_index);
+ if (udata)
+ mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
+ context->db_tab, ucmd.set_db_index);
- return ERR_PTR(err);
+ return err;
}
static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
@@ -849,22 +793,25 @@
return ret;
}
-static int mthca_destroy_cq(struct ib_cq *cq)
+static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
- if (cq->uobject) {
+ if (udata) {
+ struct mthca_ucontext *context =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mthca_ucontext,
+ ibucontext);
+
mthca_unmap_user_db(to_mdev(cq->device),
- &to_mucontext(cq->uobject->context)->uar,
- to_mucontext(cq->uobject->context)->db_tab,
+ &context->uar,
+ context->db_tab,
to_mcq(cq)->arm_db_index);
mthca_unmap_user_db(to_mdev(cq->device),
- &to_mucontext(cq->uobject->context)->uar,
- to_mucontext(cq->uobject->context)->db_tab,
+ &context->uar,
+ context->db_tab,
to_mcq(cq)->set_ci_db_index);
}
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
- kfree(cq);
-
- return 0;
}
static inline u32 convert_access(int acc)
@@ -999,13 +946,12 @@
return ERR_PTR(err);
}
-static int mthca_dereg_mr(struct ib_mr *mr)
+static int mthca_dereg_mr(struct ib_mr *mr, struct ib_udata *udata)
{
struct mthca_mr *mmr = to_mmr(mr);
mthca_free_mr(to_mdev(mr->device), mmr);
- if (mmr->umem)
- ib_umem_release(mmr->umem);
+ ib_umem_release(mmr->umem);
kfree(mmr);
return 0;
@@ -1196,6 +1142,13 @@
if (ret)
return ret;
+#define mthca_ib_ah mthca_ah
+#define mthca_ib_cq mthca_cq
+#define mthca_ib_pd mthca_pd
+#define mthca_ib_qp mthca_qp
+#define mthca_ib_srq mthca_srq
+#define mthca_ib_ucontext mthca_ucontext
+ INIT_IB_DEVICE_OPS(&dev->ib_dev.ops, mthca, MTHCA);
strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
diff --git a/sys/dev/mthca/mthca_qp.c b/sys/dev/mthca/mthca_qp.c
--- a/sys/dev/mthca/mthca_qp.c
+++ b/sys/dev/mthca/mthca_qp.c
@@ -42,6 +42,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
+#include <rdma/uverbs_ioctl.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -542,10 +543,14 @@
static int __mthca_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
- enum ib_qp_state cur_state, enum ib_qp_state new_state)
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
+ struct mthca_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
struct mthca_mailbox *mailbox;
struct mthca_qp_param *qp_param;
struct mthca_qp_context *qp_context;
@@ -607,8 +612,7 @@
/* leave arbel_sched_queue as 0 */
if (qp->ibqp.uobject)
- qp_context->usr_page =
- cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
+ qp_context->usr_page = cpu_to_be32(context->uar.index);
else
qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
qp_context->local_qpn = cpu_to_be32(qp->qpn);
@@ -900,7 +904,8 @@
goto out;
}
- err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state,
+ udata);
out:
mutex_unlock(&qp->mutex);
@@ -968,7 +973,8 @@
*/
static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int size;
int err = -ENOMEM;
@@ -1035,7 +1041,7 @@
* allocate anything. All we need is to calculate the WQE
* sizes and the send_wqe_offset, so we're done now.
*/
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
size = PAGE_ALIGN(qp->send_wqe_offset +
@@ -1141,7 +1147,8 @@
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int ret;
int i;
@@ -1164,7 +1171,7 @@
if (ret)
return ret;
- ret = mthca_alloc_wqe_buf(dev, pd, qp);
+ ret = mthca_alloc_wqe_buf(dev, pd, qp, udata);
if (ret) {
mthca_unmap_memfree(dev, qp);
return ret;
@@ -1177,7 +1184,7 @@
* will be allocated and buffers will be initialized in
* userspace.
*/
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
ret = mthca_alloc_memfree(dev, qp);
@@ -1270,7 +1277,8 @@
enum ib_qp_type type,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int err;
@@ -1293,7 +1301,7 @@
qp->port = 0;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, qp);
+ send_policy, qp, udata);
if (err) {
mthca_free(&dev->qp_table.alloc, qp->qpn);
return err;
@@ -1345,7 +1353,8 @@
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp)
+ struct mthca_sqp *sqp,
+ struct ib_udata *udata)
{
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
int err;
@@ -1376,7 +1385,7 @@
sqp->qp.transport = MLX;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, &sqp->qp);
+ send_policy, &sqp->qp, udata);
if (err)
goto err_out_free;
diff --git a/sys/dev/mthca/mthca_srq.c b/sys/dev/mthca/mthca_srq.c
--- a/sys/dev/mthca/mthca_srq.c
+++ b/sys/dev/mthca/mthca_srq.c
@@ -36,6 +36,8 @@
#include <asm/io.h>
+#include <rdma/uverbs_ioctl.h>
+
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_memfree.h"
@@ -95,17 +97,20 @@
static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
- struct mthca_tavor_srq_context *context)
+ struct mthca_tavor_srq_context *context,
+ struct ib_udata *udata)
{
+ struct mthca_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
+
memset(context, 0, sizeof *context);
context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4));
context->state_pd = cpu_to_be32(pd->pd_num);
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
- if (pd->ibpd.uobject)
- context->uar =
- cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+ if (udata)
+ context->uar = cpu_to_be32(ucontext->uar.index);
else
context->uar = cpu_to_be32(dev->driver_uar.index);
}
@@ -113,8 +118,11 @@
static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
- struct mthca_arbel_srq_context *context)
+ struct mthca_arbel_srq_context *context,
+ struct ib_udata *udata)
{
+ struct mthca_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
int logsize;
memset(context, 0, sizeof *context);
@@ -123,9 +131,8 @@
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
context->db_index = cpu_to_be32(srq->db_index);
context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
- if (pd->ibpd.uobject)
- context->logstride_usrpage |=
- cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+ if (udata)
+ context->logstride_usrpage |= cpu_to_be32(ucontext->uar.index);
else
context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index);
context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num);
@@ -139,14 +146,14 @@
}
static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
- struct mthca_srq *srq)
+ struct mthca_srq *srq, struct ib_udata *udata)
{
struct mthca_data_seg *scatter;
void *wqe;
int err;
int i;
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL);
@@ -191,7 +198,8 @@
}
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
- struct ib_srq_attr *attr, struct mthca_srq *srq)
+ struct ib_srq_attr *attr, struct mthca_srq *srq,
+ struct ib_udata *udata)
{
struct mthca_mailbox *mailbox;
int ds;
@@ -229,7 +237,7 @@
if (err)
goto err_out;
- if (!pd->ibpd.uobject) {
+ if (!udata) {
srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
srq->srqn, &srq->db);
if (srq->db_index < 0) {
@@ -245,7 +253,7 @@
goto err_out_db;
}
- err = mthca_alloc_srq_buf(dev, pd, srq);
+ err = mthca_alloc_srq_buf(dev, pd, srq, udata);
if (err)
goto err_out_mailbox;
@@ -255,9 +263,9 @@
mutex_init(&srq->mutex);
if (mthca_is_memfree(dev))
- mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
+ mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata);
else
- mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
+ mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata);
err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn);
@@ -291,14 +299,14 @@
mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
err_out_free_buf:
- if (!pd->ibpd.uobject)
+ if (!udata)
mthca_free_srq_buf(dev, srq);
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
err_out_db:
- if (!pd->ibpd.uobject && mthca_is_memfree(dev))
+ if (!udata && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
err_out_icm:
diff --git a/sys/dev/qlnx/qlnxr/qlnxr_def.h b/sys/dev/qlnx/qlnxr/qlnxr_def.h
--- a/sys/dev/qlnx/qlnxr/qlnxr_def.h
+++ b/sys/dev/qlnx/qlnxr/qlnxr_def.h
@@ -65,6 +65,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_sa.h>
+#include <rdma/uverbs_ioctl.h>
#if __FreeBSD_version < 1100000
#undef MODULE_VERSION
diff --git a/sys/dev/qlnx/qlnxr/qlnxr_os.c b/sys/dev/qlnx/qlnxr/qlnxr_os.c
--- a/sys/dev/qlnx/qlnxr/qlnxr_os.c
+++ b/sys/dev/qlnx/qlnxr/qlnxr_os.c
@@ -156,6 +156,13 @@
ibdev = &dev->ibdev;
+#define qlnxr_ib_ah qlnxr_ah
+#define qlnxr_ib_cq qlnxr_cq
+#define qlnxr_ib_pd qlnxr_pd
+#define qlnxr_ib_qp qlnxr_qp
+#define qlnxr_ib_srq qlnxr_srq
+#define qlnxr_ib_ucontext qlnxr_ucontext
+ INIT_IB_DEVICE_OPS(&ibdev->ops, qlnxr, QLNXR);
strlcpy(ibdev->name, "qlnxr%d", IB_DEVICE_NAME_MAX);
memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid));
diff --git a/sys/dev/qlnx/qlnxr/qlnxr_verbs.h b/sys/dev/qlnx/qlnxr/qlnxr_verbs.h
--- a/sys/dev/qlnx/qlnxr/qlnxr_verbs.h
+++ b/sys/dev/qlnx/qlnxr/qlnxr_verbs.h
@@ -40,11 +40,12 @@
int index,
union ib_gid *gid);
-extern struct ib_srq *qlnxr_create_srq(struct ib_pd *,
+extern int qlnxr_create_srq(struct ib_srq *ibsrq,
struct ib_srq_init_attr *,
struct ib_udata *);
-extern int qlnxr_destroy_srq(struct ib_srq *);
+extern void qlnxr_destroy_srq(struct ib_srq *,
+ struct ib_udata *);
extern int qlnxr_modify_srq(struct ib_srq *,
struct ib_srq_attr *,
@@ -79,33 +80,15 @@
extern enum rdma_link_layer qlnxr_link_layer(struct ib_device *device,
uint8_t port_num);
-struct ib_pd *qlnxr_alloc_pd(struct ib_device *,
- struct ib_ucontext *,
- struct ib_udata *);
+extern int qlnxr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *);
-extern int qlnxr_dealloc_pd(struct ib_pd *pd);
+extern void qlnxr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-#if __FreeBSD_version >= 1102000
-extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_ctx,
- struct ib_udata *udata);
-#else
-#if __FreeBSD_version >= 1100000
-extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev,
- struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_ctx,
- struct ib_udata *udata);
-#else
-extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev,
- int cqe,
- int comp_vector,
- struct ib_ucontext *ib_ctx,
- struct ib_udata *udata);
-#endif
-#endif /* #if __FreeBSD_version >= 1102000 */
+extern int qlnxr_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
-extern int qlnxr_destroy_cq(struct ib_cq *);
+extern void qlnxr_destroy_cq(struct ib_cq *, struct ib_udata *);
extern int qlnxr_resize_cq(struct ib_cq *,
int cqe,
@@ -129,22 +112,17 @@
int qp_attr_mask,
struct ib_qp_init_attr *);
-extern int qlnxr_destroy_qp(struct ib_qp *);
+extern int qlnxr_destroy_qp(struct ib_qp *, struct ib_udata *);
extern int qlnxr_query_pkey(struct ib_device *,
u8 port,
u16 index,
u16 *pkey);
-#if __FreeBSD_version >= 1102000
-extern struct ib_ah *qlnxr_create_ah(struct ib_pd *ibpd,
- struct ib_ah_attr *attr, struct ib_udata *udata);
-#else
-extern struct ib_ah *qlnxr_create_ah(struct ib_pd *ibpd,
- struct ib_ah_attr *attr);
-#endif /* #if __FreeBSD_version >= 1102000 */
-
-extern int qlnxr_destroy_ah(struct ib_ah *ibah);
+extern int qlnxr_create_ah(struct ib_ah *ibah,
+ struct ib_ah_attr *attr, u32 flags,
+ struct ib_udata *udata);
+extern void qlnxr_destroy_ah(struct ib_ah *ibah, u32 flags);
extern int qlnxr_query_ah(struct ib_ah *ibah,
struct ib_ah_attr *attr);
@@ -195,7 +173,7 @@
u64 *iova_start);
#endif /* #if __FreeBSD_version < 1102000 */
-extern int qlnxr_dereg_mr(struct ib_mr *);
+extern int qlnxr_dereg_mr(struct ib_mr *, struct ib_udata *);
#if __FreeBSD_version >= 1102000
extern struct ib_mr *qlnxr_reg_user_mr(struct ib_pd *,
@@ -217,7 +195,9 @@
#if __FreeBSD_version >= 1102000
extern struct ib_mr *qlnxr_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type, u32 max_num_sg);
+ enum ib_mr_type mr_type, u32 max_num_sg,
+ struct ib_udata *udata);
+
extern int qlnxr_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
#else
@@ -233,10 +213,10 @@
#endif /* #if __FreeBSD_version >= 1102000 */
-extern struct ib_ucontext *qlnxr_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata);
+extern int qlnxr_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata);
-extern int qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx);
+extern void qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx);
extern int qlnxr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
diff --git a/sys/dev/qlnx/qlnxr/qlnxr_verbs.c b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c
--- a/sys/dev/qlnx/qlnxr/qlnxr_verbs.c
+++ b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c
@@ -71,8 +71,7 @@
((unsigned char *)&addr)[3]
static int
-qlnxr_check_srq_params(struct ib_pd *ibpd,
- struct qlnxr_dev *dev,
+qlnxr_check_srq_params(struct qlnxr_dev *dev,
struct ib_srq_init_attr *attrs);
static int
@@ -159,9 +158,10 @@
return 0;
}
-struct ib_srq *
-qlnxr_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+int
+qlnxr_create_srq(struct ib_srq *ibsrq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct qlnxr_dev *dev;
qlnx_host_t *ha;
@@ -169,36 +169,28 @@
struct ecore_rdma_create_srq_out_params out_params;
struct ecore_rdma_create_srq_in_params in_params;
u64 pbl_base_addr, phy_prod_pair_addr;
- struct qlnxr_pd *pd = get_qlnxr_pd(ibpd);
- struct ib_ucontext *ib_ctx = NULL;
struct qlnxr_srq_hwq_info *hw_srq;
- struct qlnxr_ucontext *ctx = NULL;
+ struct qlnxr_ucontext *ctx;
struct qlnxr_create_srq_ureq ureq;
u32 page_cnt, page_size;
- struct qlnxr_srq *srq;
+ struct qlnxr_srq *srq = get_qlnxr_srq(ibsrq);
int ret = 0;
- dev = get_qlnxr_dev((ibpd->device));
+ dev = get_qlnxr_dev(ibsrq->device);
ha = dev->ha;
QL_DPRINT12(ha, "enter\n");
- ret = qlnxr_check_srq_params(ibpd, dev, init_attr);
-
- srq = kzalloc(sizeof(*srq), GFP_KERNEL);
- if (!srq) {
- QL_DPRINT11(ha, "cannot allocate memory for srq\n");
- return NULL; //@@@ : TODO what to return here?
- }
+ ret = qlnxr_check_srq_params(dev, init_attr);
srq->dev = dev;
hw_srq = &srq->hw_srq;
spin_lock_init(&srq->lock);
memset(&in_params, 0, sizeof(in_params));
- if (udata && ibpd->uobject && ibpd->uobject->context) {
- ib_ctx = ibpd->uobject->context;
- ctx = get_qlnxr_ucontext(ib_ctx);
+ if (udata) {
+ ctx = rdma_udata_to_drv_context(
+ udata, struct qlnxr_ucontext, ibucontext);
memset(&ureq, 0, sizeof(ureq));
if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
@@ -208,7 +200,7 @@
goto err0;
}
- ret = qlnxr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0);
+ ret = qlnxr_init_srq_user_params(&ctx->ibucontext, srq, &ureq, 0, 0);
if (ret)
goto err0;
@@ -232,7 +224,7 @@
page_size = pbl->elem_per_page << 4;
}
- in_params.pd_id = pd->pd_id;
+ in_params.pd_id = get_qlnxr_pd(ibsrq->pd)->pd_id;
in_params.pbl_base_addr = pbl_base_addr;
in_params.prod_pair_addr = phy_prod_pair_addr;
in_params.num_pages = page_cnt;
@@ -251,7 +243,7 @@
}
QL_DPRINT12(ha, "created srq with srq_id = 0x%0x\n", srq->srq_id);
- return &srq->ibsrq;
+ return (0);
err2:
memset(&in_params, 0, sizeof(in_params));
destroy_in_params.srq_id = srq->srq_id;
@@ -264,12 +256,11 @@
qlnxr_free_srq_kernel_params(srq);
err0:
- kfree(srq);
- return ERR_PTR(-EFAULT);
+ return (-EFAULT);
}
-int
-qlnxr_destroy_srq(struct ib_srq *ibsrq)
+void
+qlnxr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct qlnxr_dev *dev;
struct qlnxr_srq *srq;
@@ -291,8 +282,6 @@
qlnxr_free_srq_kernel_params(srq);
QL_DPRINT12(ha, "destroyed srq_id=0x%0x\n", srq->srq_id);
- kfree(srq);
- return 0;
}
int
@@ -718,11 +707,11 @@
return IB_LINK_LAYER_ETHERNET;
}
-struct ib_pd *
-qlnxr_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context,
- struct ib_udata *udata)
+int
+qlnxr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
- struct qlnxr_pd *pd = NULL;
+ struct ib_device *ibdev = ibpd->device;
+ struct qlnxr_pd *pd = get_qlnxr_pd(ibpd);
u16 pd_id;
int rc;
struct qlnxr_dev *dev;
@@ -731,8 +720,7 @@
dev = get_qlnxr_dev(ibdev);
ha = dev->ha;
- QL_DPRINT12(ha, "ibdev = %p context = %p"
- " udata = %p enter\n", ibdev, context, udata);
+ QL_DPRINT12(ha, "ibdev = %p udata = %p enter\n", ibdev, udata);
if (dev->rdma_ctx == NULL) {
QL_DPRINT11(ha, "dev->rdma_ctx = NULL\n");
@@ -740,13 +728,6 @@
goto err;
}
- pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd) {
- rc = -ENOMEM;
- QL_DPRINT11(ha, "kzalloc(pd) = NULL\n");
- goto err;
- }
-
rc = ecore_rdma_alloc_pd(dev->rdma_ctx, &pd_id);
if (rc) {
QL_DPRINT11(ha, "ecore_rdma_alloc_pd failed\n");
@@ -755,7 +736,7 @@
pd->pd_id = pd_id;
- if (udata && context) {
+ if (udata) {
rc = ib_copy_to_udata(udata, &pd->pd_id, sizeof(pd->pd_id));
if (rc) {
QL_DPRINT11(ha, "ib_copy_to_udata failed\n");
@@ -763,7 +744,8 @@
goto err;
}
- pd->uctx = get_qlnxr_ucontext(context);
+ pd->uctx = rdma_udata_to_drv_context(
+ udata, struct qlnxr_ucontext, ibucontext);
pd->uctx->pd = pd;
}
@@ -771,16 +753,15 @@
QL_DPRINT12(ha, "exit [pd, pd_id, pd_count] = [%p, 0x%x, %d]\n",
pd, pd_id, dev->pd_count);
- return &pd->ibpd;
+ return (0);
err:
- kfree(pd);
QL_DPRINT12(ha, "exit -1\n");
- return ERR_PTR(rc);
+ return (rc);
}
-int
-qlnxr_dealloc_pd(struct ib_pd *ibpd)
+void
+qlnxr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct qlnxr_pd *pd;
struct qlnxr_dev *dev;
@@ -796,14 +777,12 @@
QL_DPRINT11(ha, "pd = NULL\n");
} else {
ecore_rdma_free_pd(dev->rdma_ctx, pd->pd_id);
- kfree(pd);
atomic_subtract_rel_32(&dev->pd_count, 1);
QL_DPRINT12(ha, "exit [pd, pd_id, pd_count] = [%p, 0x%x, %d]\n",
pd, pd->pd_id, dev->pd_count);
}
QL_DPRINT12(ha, "exit\n");
- return 0;
}
#define ROCE_WQE_ELEM_SIZE sizeof(struct rdma_sq_sge)
@@ -977,24 +956,18 @@
return found;
}
-struct
-ib_ucontext *qlnxr_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+int
+qlnxr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
{
int rc;
- struct qlnxr_ucontext *ctx;
+ struct qlnxr_ucontext *ctx = get_qlnxr_ucontext(uctx);
struct qlnxr_alloc_ucontext_resp uresp;
- struct qlnxr_dev *dev = get_qlnxr_dev(ibdev);
+ struct qlnxr_dev *dev = get_qlnxr_dev(uctx->device);
qlnx_host_t *ha = dev->ha;
struct ecore_rdma_add_user_out_params oparams;
- if (!udata) {
- return ERR_PTR(-EFAULT);
- }
-
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
+ if (!udata)
+ return -EFAULT;
rc = ecore_rdma_add_user(dev->rdma_ctx, &oparams);
if (rc) {
@@ -1044,20 +1017,18 @@
QL_DPRINT12(ha, "Allocated user context %p\n",
&ctx->ibucontext);
- return &ctx->ibucontext;
+ return (0);
err:
- kfree(ctx);
- return ERR_PTR(rc);
+ return (rc);
}
-int
+void
qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx)
{
struct qlnxr_ucontext *uctx = get_qlnxr_ucontext(ibctx);
struct qlnxr_dev *dev = uctx->dev;
qlnx_host_t *ha = dev->ha;
struct qlnxr_mm *mm, *tmp;
- int status = 0;
QL_DPRINT12(ha, "Deallocating user context %p\n",
uctx);
@@ -1073,8 +1044,6 @@
list_del(&mm->entry);
kfree(mm);
}
- kfree(uctx);
- return status;
}
int
@@ -1662,7 +1631,7 @@
}
int
-qlnxr_dereg_mr(struct ib_mr *ib_mr)
+qlnxr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
{
struct qlnxr_mr *mr = get_qlnxr_mr(ib_mr);
struct qlnxr_dev *dev = get_qlnxr_dev((ib_mr->device));
@@ -1822,35 +1791,10 @@
return rc;
}
-#if __FreeBSD_version >= 1102000
-
-struct ib_cq *
-qlnxr_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_ctx,
- struct ib_udata *udata)
-
-#else
-
-#if __FreeBSD_version >= 1100000
-
-struct ib_cq *
-qlnxr_create_cq(struct ib_device *ibdev,
- struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_ctx,
- struct ib_udata *udata)
-
-#else
-
-struct ib_cq *
-qlnxr_create_cq(struct ib_device *ibdev,
- int entries,
- int vector,
- struct ib_ucontext *ib_ctx,
- struct ib_udata *udata)
-#endif /* #if __FreeBSD_version >= 1100000 */
-
-#endif /* #if __FreeBSD_version >= 1102000 */
+int
+qlnxr_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
struct qlnxr_ucontext *ctx;
struct ecore_rdma_destroy_cq_out_params destroy_oparams;
@@ -1863,13 +1807,13 @@
int vector = attr->comp_vector;
int entries = attr->cqe;
#endif
- struct qlnxr_cq *cq;
+ struct qlnxr_cq *cq = get_qlnxr_cq(ibcq);
int chain_entries, rc, page_cnt;
u64 pbl_ptr;
u16 icid;
qlnx_host_t *ha;
- dev = get_qlnxr_dev(ibdev);
+ dev = get_qlnxr_dev(ibcq->device);
ha = dev->ha;
QL_DPRINT12(ha, "called from %s. entries = %d, "
@@ -1885,17 +1829,15 @@
"the number of entries %d is too high. "
"Must be equal or below %d.\n",
entries, QLNXR_MAX_CQES);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
chain_entries = qlnxr_align_cq_entries(entries);
chain_entries = min_t(int, chain_entries, QLNXR_MAX_CQES);
- cq = qlnx_zalloc((sizeof(struct qlnxr_cq)));
-
- if (!cq)
- return ERR_PTR(-ENOMEM);
-
if (udata) {
+ ctx = rdma_udata_to_drv_context(
+ udata, struct qlnxr_ucontext, ibucontext);
+
memset(&ureq, 0, sizeof(ureq));
if (ib_copy_from_udata(&ureq, udata,
@@ -1911,13 +1853,15 @@
cq->cq_type = QLNXR_CQ_TYPE_USER;
- qlnxr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, ureq.len,
+ qlnxr_init_user_queue(&ctx->ibucontext, dev, &cq->q, ureq.addr, ureq.len,
IB_ACCESS_LOCAL_WRITE, 1, 1);
pbl_ptr = cq->q.pbl_tbl->pa;
page_cnt = cq->q.pbl_info.num_pbes;
cq->ibcq.cqe = chain_entries;
} else {
+ ctx = NULL;
+
cq->cq_type = QLNXR_CQ_TYPE_KERNEL;
rc = ecore_chain_alloc(&dev->ha->cdev,
@@ -1944,8 +1888,7 @@
params.pbl_ptr = pbl_ptr;
params.pbl_two_level = 0;
- if (ib_ctx != NULL) {
- ctx = get_qlnxr_ucontext(ib_ctx);
+ if (udata) {
params.dpi = ctx->dpi;
} else {
params.dpi = dev->dpi;
@@ -1959,7 +1902,7 @@
cq->sig = QLNXR_CQ_MAGIC_NUMBER;
spin_lock_init(&cq->cq_lock);
- if (ib_ctx) {
+ if (udata) {
rc = qlnxr_copy_cq_uresp(dev, cq, udata);
if (rc)
goto err3;
@@ -1990,7 +1933,7 @@
" number of entries = 0x%x\n",
cq->icid, cq, params.cq_size);
QL_DPRINT12(ha,"cq_addr = %p\n", cq);
- return &cq->ibcq;
+ return (0);
err3:
destroy_iparams.icid = cq->icid;
@@ -2004,11 +1947,9 @@
if (udata)
ib_umem_release(cq->q.umem);
err0:
- kfree(cq);
-
QL_DPRINT12(ha, "exit error\n");
- return ERR_PTR(-EINVAL);
+ return (-EINVAL);
}
int qlnxr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
@@ -2024,8 +1965,8 @@
return status;
}
-int
-qlnxr_destroy_cq(struct ib_cq *ibcq)
+void
+qlnxr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device));
struct ecore_rdma_destroy_cq_out_params oparams;
@@ -2055,25 +1996,21 @@
if (rc) {
QL_DPRINT12(ha, "ecore_rdma_destroy_cq failed cq_id = %d\n",
cq->icid);
- return rc;
+ return;
}
QL_DPRINT12(ha, "free cq->pbl cq_id = %d\n", cq->icid);
ecore_chain_free(&dev->ha->cdev, &cq->pbl);
}
- if (ibcq->uobject && ibcq->uobject->context) {
+ if (udata) {
qlnxr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
ib_umem_release(cq->q.umem);
}
cq->sig = ~cq->sig;
- kfree(cq);
-
QL_DPRINT12(ha, "exit cq_id = %d\n", cq->icid);
-
- return rc;
}
static int
@@ -2393,8 +2330,7 @@
}
static int
-qlnxr_check_srq_params(struct ib_pd *ibpd,
- struct qlnxr_dev *dev,
+qlnxr_check_srq_params(struct qlnxr_dev *dev,
struct ib_srq_init_attr *attrs)
{
struct ecore_rdma_device *qattr;
@@ -3281,8 +3217,6 @@
return &qp->ibqp;
err:
- kfree(qp);
-
QL_DPRINT12(ha, "failed exit\n");
return ERR_PTR(-EFAULT);
}
@@ -3987,9 +3921,9 @@
return;
}
-int
+static int
qlnxr_free_qp_resources(struct qlnxr_dev *dev,
- struct qlnxr_qp *qp)
+ struct qlnxr_qp *qp, struct ib_udata *udata)
{
int rc = 0;
qlnx_host_t *ha;
@@ -4007,13 +3941,13 @@
return rc;
}
- if (qp->ibqp.uobject && qp->ibqp.uobject->context)
+ if (udata)
qlnxr_cleanup_user(dev, qp);
else
qlnxr_cleanup_kernel(dev, qp);
#endif
- if (qp->ibqp.uobject && qp->ibqp.uobject->context)
+ if (udata)
qlnxr_cleanup_user(dev, qp);
else
qlnxr_cleanup_kernel(dev, qp);
@@ -4030,7 +3964,7 @@
}
int
-qlnxr_destroy_qp(struct ib_qp *ibqp)
+qlnxr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct qlnxr_qp *qp = get_qlnxr_qp(ibqp);
struct qlnxr_dev *dev = qp->dev;
@@ -4060,12 +3994,11 @@
qp->sig = ~qp->sig;
- qlnxr_free_qp_resources(dev, qp);
+ qlnxr_free_qp_resources(dev, qp, udata);
if (atomic_dec_and_test(&qp->refcnt)) {
/* TODO: only for iWARP? */
qlnxr_idr_remove(dev, qp->qp_id);
- kfree(qp);
}
QL_DPRINT12(ha, "exit\n");
@@ -5837,7 +5770,8 @@
#if __FreeBSD_version >= 1102000
struct ib_mr *
-qlnxr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg)
+qlnxr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata)
{
struct qlnxr_dev *dev;
struct qlnxr_mr *mr;
@@ -6266,48 +6200,28 @@
#endif /* #if __FreeBSD_version >= 1102000 */
-struct ib_ah *
-#if __FreeBSD_version >= 1102000
-qlnxr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+int
+qlnxr_create_ah(struct ib_ah *ibah,
+ struct ib_ah_attr *attr, u32 flags,
struct ib_udata *udata)
-#else
-qlnxr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
-#endif /* #if __FreeBSD_version >= 1102000 */
{
struct qlnxr_dev *dev;
qlnx_host_t *ha;
- struct qlnxr_ah *ah;
+ struct qlnxr_ah *ah = get_qlnxr_ah(ibah);
- dev = get_qlnxr_dev((ibpd->device));
+ dev = get_qlnxr_dev(ibah->device);
ha = dev->ha;
QL_DPRINT12(ha, "in create_ah\n");
- ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
- if (!ah) {
- QL_DPRINT12(ha, "no address handle can be allocated\n");
- return ERR_PTR(-ENOMEM);
- }
-
ah->attr = *attr;
- return &ah->ibah;
+ return (0);
}
-int
-qlnxr_destroy_ah(struct ib_ah *ibah)
+void
+qlnxr_destroy_ah(struct ib_ah *ibah, u32 flags)
{
- struct qlnxr_dev *dev;
- qlnx_host_t *ha;
- struct qlnxr_ah *ah = get_qlnxr_ah(ibah);
-
- dev = get_qlnxr_dev((ibah->device));
- ha = dev->ha;
-
- QL_DPRINT12(ha, "in destroy_ah\n");
-
- kfree(ah);
- return 0;
}
int
@@ -7187,7 +7101,6 @@
if (atomic_dec_and_test(&qp->refcnt)) {
qlnxr_idr_remove(qp->dev, qp->qp_id);
- kfree(qp);
}
QL_DPRINT12(ha, "exit \n");
diff --git a/sys/modules/ibcore/Makefile b/sys/modules/ibcore/Makefile
--- a/sys/modules/ibcore/Makefile
+++ b/sys/modules/ibcore/Makefile
@@ -8,6 +8,7 @@
ib_cache.c \
ib_cm.c \
ib_cma.c \
+ ib_core_uverbs.c \
ib_cq.c \
ib_device.c \
ib_fmr_pool.c \
@@ -18,6 +19,7 @@
ib_mad_rmpp.c \
ib_multicast.c \
ib_packer.c \
+ ib_rdma_core.c \
ib_roce_gid_mgmt.c \
ib_sa_query.c \
ib_smi.c \
@@ -28,9 +30,20 @@
ib_umem.c \
ib_user_mad.c \
ib_uverbs_cmd.c \
+ ib_uverbs_ioctl.c \
ib_uverbs_main.c \
ib_uverbs_marshall.c \
+ ib_uverbs_std_types.c \
+ ib_uverbs_std_types_async_fd.c \
+ ib_uverbs_std_types_counters.c \
+ ib_uverbs_std_types_cq.c \
+ ib_uverbs_std_types_device.c \
+ ib_uverbs_std_types_dm.c \
+ ib_uverbs_std_types_flow_action.c \
+ ib_uverbs_std_types_mr.c \
+ ib_uverbs_uapi.c \
ib_verbs.c
+
SRCS+= ${LINUXKPI_GENSRCS}
SRCS+= opt_inet.h opt_inet6.h
diff --git a/sys/modules/mlx5/Makefile b/sys/modules/mlx5/Makefile
--- a/sys/modules/mlx5/Makefile
+++ b/sys/modules/mlx5/Makefile
@@ -38,6 +38,7 @@
SRCS+= opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h
CFLAGS+= -I${SRCTOP}/sys/ofed/include
+CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi
CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include
.if defined(CONFIG_BUILD_FPGA)
diff --git a/sys/modules/mlx5en/Makefile b/sys/modules/mlx5en/Makefile
--- a/sys/modules/mlx5en/Makefile
+++ b/sys/modules/mlx5en/Makefile
@@ -29,6 +29,7 @@
.endif
CFLAGS+= -I${SRCTOP}/sys/ofed/include
+CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi
CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include
.include <bsd.kmod.mk>
diff --git a/sys/modules/mlx5fpga_tools/Makefile b/sys/modules/mlx5fpga_tools/Makefile
--- a/sys/modules/mlx5fpga_tools/Makefile
+++ b/sys/modules/mlx5fpga_tools/Makefile
@@ -10,6 +10,7 @@
SRCS+= opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h
CFLAGS+= -I${SRCTOP}/sys/ofed/include
+CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi
CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include
.include <bsd.kmod.mk>
diff --git a/sys/modules/mlx5ib/Makefile b/sys/modules/mlx5ib/Makefile
--- a/sys/modules/mlx5ib/Makefile
+++ b/sys/modules/mlx5ib/Makefile
@@ -6,6 +6,7 @@
mlx5_ib_ah.c \
mlx5_ib_cong.c \
mlx5_ib_cq.c \
+mlx5_ib_devx.c \
mlx5_ib_doorbell.c \
mlx5_ib_gsi.c \
mlx5_ib_mad.c \
diff --git a/sys/modules/mlxfw/Makefile b/sys/modules/mlxfw/Makefile
--- a/sys/modules/mlxfw/Makefile
+++ b/sys/modules/mlxfw/Makefile
@@ -10,6 +10,7 @@
CFLAGS+= \
-I${SRCTOP}/sys/ofed/include \
+ -I${SRCTOP}/sys/ofed/include/uapi \
-I${SRCTOP}/sys/compat/linuxkpi/common/include \
-I${SRCTOP}/sys/contrib/xz-embedded/freebsd \
-I${SRCTOP}/sys/contrib/xz-embedded/linux/lib/xz
diff --git a/sys/ofed/drivers/infiniband/core/core_priv.h b/sys/ofed/drivers/infiniband/core/core_priv.h
--- a/sys/ofed/drivers/infiniband/core/core_priv.h
+++ b/sys/ofed/drivers/infiniband/core/core_priv.h
@@ -44,6 +44,9 @@
#include <net/if_vlan_var.h>
+/* Total number of ports combined across all struct ib_devices's */
+#define RDMA_MAX_PORTS 8192
+
#ifdef CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS
int cma_configfs_init(void);
void cma_configfs_exit(void);
@@ -128,6 +131,8 @@
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+#define ib_rdmacg_try_charge(...) ({ 0; })
+
int addr_init(void);
void addr_cleanup(void);
@@ -142,4 +147,48 @@
const char *name);
void ib_port_unregister_module_stat(struct kobject *kobj);
+static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
+ struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj)
+{
+ struct ib_qp *qp;
+
+ if (!dev->create_qp)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ qp = dev->create_qp(pd, attr, udata);
+ if (IS_ERR(qp))
+ return qp;
+
+ qp->device = dev;
+ qp->pd = pd;
+ qp->uobject = uobj;
+ qp->real_qp = qp;
+
+ qp->qp_type = attr->qp_type;
+ qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
+ qp->srq = attr->srq;
+ qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+ qp->event_handler = attr->event_handler;
+
+ atomic_set(&qp->usecnt, 0);
+ spin_lock_init(&qp->mr_lock);
+
+ return qp;
+}
+
+struct rdma_umap_priv {
+ struct vm_area_struct *vma;
+ struct list_head list;
+ struct rdma_user_mmap_entry *entry;
+};
+
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+ struct vm_area_struct *vma,
+ struct rdma_user_mmap_entry *entry);
+
#endif /* _CORE_PRIV_H */
diff --git a/sys/ofed/drivers/infiniband/core/ib_agent.c b/sys/ofed/drivers/infiniband/core/ib_agent.c
--- a/sys/ofed/drivers/infiniband/core/ib_agent.c
+++ b/sys/ofed/drivers/infiniband/core/ib_agent.c
@@ -141,13 +141,13 @@
err2:
ib_free_send_mad(send_buf);
err1:
- ib_destroy_ah(ah);
+ ib_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ ib_destroy_ah(mad_send_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_send_wc->send_buf);
}
diff --git a/sys/ofed/drivers/infiniband/core/ib_cm.c b/sys/ofed/drivers/infiniband/core/ib_cm.c
--- a/sys/ofed/drivers/infiniband/core/ib_cm.c
+++ b/sys/ofed/drivers/infiniband/core/ib_cm.c
@@ -351,7 +351,7 @@
ret = -ENODEV;
goto out;
}
- ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+ ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto out;
@@ -363,7 +363,7 @@
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
+ ib_destroy_ah(ah, 0);
ret = PTR_ERR(m);
goto out;
}
@@ -408,7 +408,7 @@
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
if (msg->ah)
- ib_destroy_ah(msg->ah);
+ ib_destroy_ah(msg->ah, 0);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
diff --git a/sys/ofed/drivers/infiniband/core/ib_core_uverbs.c b/sys/ofed/drivers/infiniband/core/ib_core_uverbs.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_core_uverbs.c
@@ -0,0 +1,390 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
+ *
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2019 Marvell. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <linux/xarray.h>
+#include "uverbs.h"
+#include "core_priv.h"
+
+/**
+ * rdma_umap_priv_init() - Initialize the private data of a vma
+ *
+ * @priv: The already allocated private data
+ * @vma: The vm area struct that needs private data
+ * @entry: entry into the mmap_xa that needs to be linked with
+ * this vma
+ *
+ * Each time we map IO memory into user space this keeps track of the
+ * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
+ * to point to the zero page and allow the hot unplug to proceed.
+ *
+ * This is necessary for cases like PCI physical hot unplug as the actual BAR
+ * memory may vanish after this and access to it from userspace could MCE.
+ *
+ * RDMA drivers supporting disassociation must have their user space designed
+ * to cope in some way with their IO pages going to the zero page.
+ *
+ */
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+ struct vm_area_struct *vma,
+ struct rdma_user_mmap_entry *entry)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+
+ priv->vma = vma;
+ if (entry) {
+ kref_get(&entry->ref);
+ priv->entry = entry;
+ }
+ vma->vm_private_data = priv;
+ /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
+
+ mutex_lock(&ufile->umap_lock);
+ list_add(&priv->list, &ufile->umaps);
+ mutex_unlock(&ufile->umap_lock);
+}
+EXPORT_SYMBOL(rdma_umap_priv_init);
+
+/**
+ * rdma_user_mmap_io() - Map IO memory into a process
+ *
+ * @ucontext: associated user context
+ * @vma: the vma related to the current mmap call
+ * @pfn: pfn to map
+ * @size: size to map
+ * @prot: pgprot to use in remap call
+ * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
+ * if mmap_entry is not used by the driver
+ *
+ * This is to be called by drivers as part of their mmap() functions if they
+ * wish to send something like PCI-E BAR memory to userspace.
+ *
+ * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
+ * success.
+ */
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot,
+ struct rdma_user_mmap_entry *entry)
+{
+ struct ib_uverbs_file *ufile = ucontext->ufile;
+ struct rdma_umap_priv *priv;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ if (vma->vm_end - vma->vm_start != size)
+ return -EINVAL;
+
+ /* Driver is using this wrong, must be called by ib_uverbs_mmap */
+ if (WARN_ON(!vma->vm_file ||
+ vma->vm_file->private_data != ufile))
+ return -EINVAL;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ vma->vm_page_prot = prot;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
+ kfree(priv);
+ return -EAGAIN;
+ }
+
+ rdma_umap_priv_init(priv, vma, entry);
+ return 0;
+}
+EXPORT_SYMBOL(rdma_user_mmap_io);
+
+/**
+ * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @pgoff: The mmap offset >> PAGE_SHIFT
+ *
+ * This function is called when a user tries to mmap with an offset (returned
+ * by rdma_user_mmap_get_offset()) it initially received from the driver. The
+ * rdma_user_mmap_entry was created by the function
+ * rdma_user_mmap_entry_insert(). This function increases the refcnt of the
+ * entry so that it won't be deleted from the xarray in the meantime.
+ *
+ * Return an reference to an entry if exists or NULL if there is no
+ * match. rdma_user_mmap_entry_put() must be called to put the reference.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
+ unsigned long pgoff)
+{
+ struct rdma_user_mmap_entry *entry;
+
+ if (pgoff > U32_MAX)
+ return NULL;
+
+ xa_lock(&ucontext->mmap_xa);
+
+ entry = xa_load(&ucontext->mmap_xa, pgoff);
+
+ /*
+ * If refcount is zero, entry is already being deleted, driver_removed
+ * indicates that the no further mmaps are possible and we waiting for
+ * the active VMAs to be closed.
+ */
+ if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
+ !kref_get_unless_zero(&entry->ref))
+ goto err;
+
+ xa_unlock(&ucontext->mmap_xa);
+
+ return entry;
+
+err:
+ xa_unlock(&ucontext->mmap_xa);
+ return NULL;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
+
+/**
+ * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @vma: the vma being mmap'd into
+ *
+ * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
+ * checks that the VMA is correct.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *entry;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return NULL;
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
+ if (!entry)
+ return NULL;
+ if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
+ rdma_user_mmap_entry_put(entry);
+ return NULL;
+ }
+ return entry;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get);
+
+static void rdma_user_mmap_entry_free(struct kref *kref)
+{
+ struct rdma_user_mmap_entry *entry =
+ container_of(kref, struct rdma_user_mmap_entry, ref);
+ struct ib_ucontext *ucontext = entry->ucontext;
+ unsigned long i;
+
+ /*
+ * Erase all entries occupied by this single entry, this is deferred
+ * until all VMA are closed so that the mmap offsets remain unique.
+ */
+ xa_lock(&ucontext->mmap_xa);
+ for (i = 0; i < entry->npages; i++)
+ __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
+ xa_unlock(&ucontext->mmap_xa);
+
+ if (ucontext->device->mmap_free)
+ ucontext->device->mmap_free(entry);
+}
+
+/**
+ * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
+ *
+ * @entry: an entry in the mmap_xa
+ *
+ * This function is called when the mapping is closed if it was
+ * an io mapping or when the driver is done with the entry for
+ * some other reason.
+ * Should be called after rdma_user_mmap_entry_get was called
+ * and entry is no longer needed. This function will erase the
+ * entry and free it if its refcnt reaches zero.
+ */
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
+{
+ kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_put);
+
+/**
+ * rdma_user_mmap_entry_remove() - Drop reference to entry and
+ * mark it as unmmapable
+ *
+ * @entry: the entry to insert into the mmap_xa
+ *
+ * Drivers can call this to prevent userspace from creating more mappings for
+ * entry, however existing mmaps continue to exist and ops->mmap_free() will
+ * not be called until all user mmaps are destroyed.
+ */
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
+{
+ if (!entry)
+ return;
+
+ xa_lock(&entry->ucontext->mmap_xa);
+ entry->driver_removed = true;
+ xa_unlock(&entry->ucontext->mmap_xa);
+ kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
+
+/**
+ * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
+ * in a given range.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ * @min_pgoff: minimum pgoff to be returned
+ * @max_pgoff: maximum pgoff to be returned
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for implementing their mmap syscall A database of mmap offsets is
+ * handled in the core and helper functions are provided to insert entries
+ * into the database and extract entries when the user calls mmap with the
+ * given offset. The function allocates a unique page offset in a given range
+ * that should be provided to user, the user will use the offset to retrieve
+ * information such as address to be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 min_pgoff,
+ u32 max_pgoff)
+{
+ struct ib_uverbs_file *ufile = ucontext->ufile;
+ u32 xa_first, xa_last, npages;
+ int err;
+ u32 i;
+ u32 j;
+
+ if (!entry)
+ return -EINVAL;
+
+ kref_init(&entry->ref);
+ entry->ucontext = ucontext;
+
+ /*
+ * We want the whole allocation to be done without interruption from a
+ * different thread. The allocation requires finding a free range and
+ * storing. During the xa_insert the lock could be released, possibly
+ * allowing another thread to choose the same range.
+ */
+ mutex_lock(&ufile->umap_lock);
+
+ xa_lock(&ucontext->mmap_xa);
+
+ /* We want to find an empty range */
+ npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
+ entry->npages = npages;
+
+ /* Find an empty range */
+ for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) {
+ if (xa_load(&ucontext->mmap_xa, i + j) != NULL) {
+ if (unlikely(i + j == max_pgoff))
+ break;
+ i = i + j + 1;
+ j = 0;
+ } else {
+ if (unlikely(i + j == max_pgoff))
+ break;
+ j++;
+ }
+ }
+
+ if (j != npages)
+ goto err_unlock;
+
+ xa_first = i;
+ xa_last = i + j;
+
+ for (i = xa_first; i < xa_last; i++) {
+ err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
+ if (err)
+ goto err_undo;
+ }
+
+ /*
+ * Internally the kernel uses a page offset, in libc this is a byte
+ * offset. Drivers should not return pgoff to userspace.
+ */
+ entry->start_pgoff = xa_first;
+ xa_unlock(&ucontext->mmap_xa);
+ mutex_unlock(&ufile->umap_lock);
+
+ return 0;
+
+err_undo:
+ for (; i > xa_first; i--)
+ __xa_erase(&ucontext->mmap_xa, i - 1);
+
+err_unlock:
+ xa_unlock(&ucontext->mmap_xa);
+ mutex_unlock(&ufile->umap_lock);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
+
+/**
+ * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for handling user mmapped addresses. The database is handled in
+ * the core and helper functions are provided to insert entries into the
+ * database and extract entries when the user calls mmap with the given offset.
+ * The function allocates a unique page offset that should be provided to user,
+ * the user will use the offset to retrieve information such as address to
+ * be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length)
+{
+ return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
+ U32_MAX);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
diff --git a/sys/ofed/drivers/infiniband/core/ib_cq.c b/sys/ofed/drivers/infiniband/core/ib_cq.c
--- a/sys/ofed/drivers/infiniband/core/ib_cq.c
+++ b/sys/ofed/drivers/infiniband/core/ib_cq.c
@@ -86,14 +86,17 @@
}
struct ib_cq *
-ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+__ib_alloc_cq_user(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx,
+ const char *caller, struct ib_udata *udata)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
.comp_vector = comp_vector,
};
struct ib_cq *cq;
+ int ret;
/*
* Check for invalid parameters early on to avoid
@@ -108,17 +111,19 @@
return (ERR_PTR(-EINVAL));
}
- cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
- if (IS_ERR(cq))
- return (cq);
+ cq = rdma_zalloc_drv_obj(dev, ib_cq);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
cq->device = dev;
- cq->uobject = NULL;
- cq->event_handler = NULL;
cq->cq_context = private;
cq->poll_ctx = poll_ctx;
atomic_set(&cq->usecnt, 0);
+ ret = dev->create_cq(cq, &cq_attr, NULL);
+ if (ret)
+ goto out_free_cq;
+
switch (poll_ctx) {
case IB_POLL_DIRECT:
cq->comp_handler = NULL; /* no hardware completions */
@@ -133,11 +138,15 @@
break;
}
return (cq);
+
+out_free_cq:
+ kfree(cq);
+ return (ERR_PTR(ret));
}
-EXPORT_SYMBOL(ib_alloc_cq);
+EXPORT_SYMBOL(__ib_alloc_cq_user);
void
-ib_free_cq(struct ib_cq *cq)
+ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
if (WARN_ON_ONCE(atomic_read(&cq->usecnt) != 0))
@@ -154,6 +163,7 @@
break;
}
- (void)cq->device->destroy_cq(cq);
+ cq->device->destroy_cq(cq, udata);
+ kfree(cq);
}
-EXPORT_SYMBOL(ib_free_cq);
+EXPORT_SYMBOL(ib_free_cq_user);
diff --git a/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c b/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c
--- a/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c
+++ b/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c
@@ -86,7 +86,7 @@
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
- ib_destroy_ah(rmpp_recv->ah);
+ ib_destroy_ah(rmpp_recv->ah, RDMA_DESTROY_AH_SLEEPABLE);
kfree(rmpp_recv);
}
@@ -176,7 +176,7 @@
hdr_len, 0, GFP_KERNEL,
IB_MGMT_BASE_VERSION);
if (IS_ERR(msg))
- ib_destroy_ah(ah);
+ ib_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
else {
msg->ah = ah;
msg->context[0] = ah;
@@ -206,7 +206,7 @@
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ ib_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(msg);
}
}
@@ -214,7 +214,7 @@
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ ib_destroy_ah(mad_send_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -242,7 +242,7 @@
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ ib_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(msg);
}
}
diff --git a/sys/ofed/drivers/infiniband/core/ib_rdma_core.c b/sys/ofed/drivers/infiniband/core/ib_rdma_core.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_rdma_core.c
@@ -0,0 +1,943 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm/atomic.h>
+#include <linux/file.h>
+#include <linux/lockdep.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <linux/rcupdate.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/rdma_user_ioctl.h>
+#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
+
+static void uverbs_uobject_free(struct kref *ref)
+{
+ kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu);
+}
+
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
+void uverbs_uobject_put(struct ib_uobject *uobject)
+{
+ kref_put(&uobject->ref, uverbs_uobject_free);
+}
+EXPORT_SYMBOL(uverbs_uobject_put);
+
+static int uverbs_try_lock_object(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+ /*
+ * When a shared access is required, we use a positive counter. Each
+ * shared access request checks that the value != -1 and increment it.
+ * Exclusive access is required for operations like write or destroy.
+ * In exclusive access mode, we check that the counter is zero (nobody
+ * claimed this object) and we set it to -1. Releasing a shared access
+ * lock is done simply by decreasing the counter. As for exclusive
+ * access locks, since only a single one of them is is allowed
+ * concurrently, setting the counter to zero is enough for releasing
+ * this lock.
+ */
+ switch (mode) {
+ case UVERBS_LOOKUP_READ:
+ return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+ -EBUSY : 0;
+ case UVERBS_LOOKUP_WRITE:
+ /* lock is exclusive */
+ return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+ case UVERBS_LOOKUP_DESTROY:
+ return 0;
+ }
+ return 0;
+}
+
+static void assert_uverbs_usecnt(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+#ifdef CONFIG_LOCKDEP
+ switch (mode) {
+ case UVERBS_LOOKUP_READ:
+ WARN_ON(atomic_read(&uobj->usecnt) <= 0);
+ break;
+ case UVERBS_LOOKUP_WRITE:
+ WARN_ON(atomic_read(&uobj->usecnt) != -1);
+ break;
+ case UVERBS_LOOKUP_DESTROY:
+ break;
+ }
+#endif
+}
+
+/*
+ * This must be called with the hw_destroy_rwsem locked for read or write,
+ * also the uobject itself must be locked for write.
+ *
+ * Upon return the HW object is guaranteed to be destroyed.
+ *
+ * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
+ * however the type's allocat_commit function cannot have been called and the
+ * uobject cannot be on the uobjects_lists
+ *
+ * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
+ * rdma_lookup_get_uobject) and the object is left in a state where the caller
+ * needs to call rdma_lookup_put_uobject.
+ *
+ * For all other destroy modes this function internally unlocks the uobject
+ * and consumes the kref on the uobj.
+ */
+static int uverbs_destroy_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason reason,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ unsigned long flags;
+ int ret;
+
+ lockdep_assert_held(&ufile->hw_destroy_rwsem);
+ assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
+
+ if (reason == RDMA_REMOVE_ABORT) {
+ WARN_ON(!list_empty(&uobj->list));
+ WARN_ON(!uobj->context);
+ uobj->uapi_object->type_class->alloc_abort(uobj);
+ } else if (uobj->object) {
+ ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
+ attrs);
+ if (ret) {
+ if (ib_is_destroy_retryable(ret, reason, uobj))
+ return ret;
+
+ /* Nothing to be done, dangle the memory and move on */
+ WARN(true,
+ "ib_uverbs: failed to remove uobject id %d, driver err=%d",
+ uobj->id, ret);
+ }
+
+ uobj->object = NULL;
+ }
+
+ uobj->context = NULL;
+
+ /*
+ * For DESTROY the usecnt is held write locked, the caller is expected
+ * to put it unlock and put the object when done with it. Only DESTROY
+ * can remove the IDR handle.
+ */
+ if (reason != RDMA_REMOVE_DESTROY)
+ atomic_set(&uobj->usecnt, 0);
+ else
+ uobj->uapi_object->type_class->remove_handle(uobj);
+
+ if (!list_empty(&uobj->list)) {
+ spin_lock_irqsave(&ufile->uobjects_lock, flags);
+ list_del_init(&uobj->list);
+ spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
+
+ /*
+ * Pairs with the get in rdma_alloc_commit_uobject(), could
+ * destroy uobj.
+ */
+ uverbs_uobject_put(uobj);
+ }
+
+ /*
+ * When aborting the stack kref remains owned by the core code, and is
+ * not transferred into the type. Pairs with the get in alloc_uobj
+ */
+ if (reason == RDMA_REMOVE_ABORT)
+ uverbs_uobject_put(uobj);
+
+ return 0;
+}
+
+/*
+ * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
+ * sequence. It should only be used from command callbacks. On success the
+ * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
+ * version requires the caller to have already obtained an
+ * LOOKUP_DESTROY uobject kref.
+ */
+int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ int ret;
+
+ down_read(&ufile->hw_destroy_rwsem);
+
+ ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
+ if (ret)
+ goto out_unlock;
+
+ ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY, attrs);
+ if (ret) {
+ atomic_set(&uobj->usecnt, 0);
+ goto out_unlock;
+ }
+
+out_unlock:
+ up_read(&ufile->hw_destroy_rwsem);
+ return ret;
+}
+
+/*
+ * uobj_get_destroy destroys the HW object and returns a handle to the uobj
+ * with a NULL object pointer. The caller must pair this with
+ * uverbs_put_destroy.
+ */
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
+ u32 id, struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_DESTROY, attrs);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = uobj_destroy(uobj, attrs);
+ if (ret) {
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
+ return ERR_PTR(ret);
+ }
+
+ return uobj;
+}
+
+/*
+ * Does both uobj_get_destroy() and uobj_put_destroy(). Returns 0 on success
+ * (negative errno on failure). For use by callers that do not need the uobj.
+ */
+int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj;
+
+ uobj = __uobj_get_destroy(obj, id, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+ return 0;
+}
+
+/* alloc_uobj must be undone by uverbs_destroy_uobject() */
+static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs,
+ const struct uverbs_api_object *obj)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_uobject *uobj;
+
+ if (!attrs->context) {
+ struct ib_ucontext *ucontext =
+ ib_uverbs_get_ucontext_file(ufile);
+
+ if (IS_ERR(ucontext))
+ return ERR_CAST(ucontext);
+ attrs->context = ucontext;
+ }
+
+ uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
+ if (!uobj)
+ return ERR_PTR(-ENOMEM);
+ /*
+ * user_handle should be filled by the handler,
+ * The object is added to the list in the commit stage.
+ */
+ uobj->ufile = ufile;
+ uobj->context = attrs->context;
+ INIT_LIST_HEAD(&uobj->list);
+ uobj->uapi_object = obj;
+ /*
+ * Allocated objects start out as write locked to deny any other
+ * syscalls from accessing them until they are committed. See
+ * rdma_alloc_commit_uobject
+ */
+ atomic_set(&uobj->usecnt, -1);
+ kref_init(&uobj->ref);
+
+ return uobj;
+}
+
+#define NULL_IB_UOBJECT ((struct ib_uobject *)1)
+
+static int idr_add_uobj(struct ib_uobject *uobj)
+{
+ /*
+ * We start with allocating an idr pointing to NULL. This represents an
+ * object which isn't initialized yet. We'll replace it later on with
+ * the real object once we commit.
+ */
+ return xa_alloc(&uobj->ufile->idr, &uobj->id, NULL_IB_UOBJECT, xa_limit_32b,
+ GFP_KERNEL);
+}
+
+/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
+static struct ib_uobject *
+lookup_get_idr_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode)
+{
+ struct ib_uobject *uobj;
+
+ if (id < 0 || id > ULONG_MAX)
+ return ERR_PTR(-EINVAL);
+
+ rcu_read_lock();
+ /*
+ * The idr_find is guaranteed to return a pointer to something that
+ * isn't freed yet, or NULL, as the free after idr_remove goes through
+ * kfree_rcu(). However the object may still have been released and
+ * kfree() could be called at any time.
+ */
+ uobj = xa_load(&ufile->idr, id);
+ if (!uobj || uobj == NULL_IB_UOBJECT || !kref_get_unless_zero(&uobj->ref))
+ uobj = ERR_PTR(-ENOENT);
+ rcu_read_unlock();
+ return uobj;
+}
+
+static struct ib_uobject *
+lookup_get_fd_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode)
+{
+ const struct uverbs_obj_fd_type *fd_type;
+ struct file *f;
+ struct ib_uobject *uobject;
+ int fdno = id;
+
+ if (fdno != id)
+ return ERR_PTR(-EINVAL);
+
+ if (mode != UVERBS_LOOKUP_READ)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!obj->type_attrs)
+ return ERR_PTR(-EIO);
+ fd_type =
+ container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+
+ f = fget(fdno);
+ if (!f)
+ return ERR_PTR(-EBADF);
+
+ uobject = f->private_data;
+ /*
+ * fget(id) ensures we are not currently running
+ * uverbs_uobject_fd_release(), and the caller is expected to ensure
+ * that release is never done while a call to lookup is possible.
+ */
+ if (f->f_op != fd_type->fops) {
+ fput(f);
+ return ERR_PTR(-EBADF);
+ }
+
+ uverbs_uobject_get(uobject);
+ return uobject;
+}
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ if (obj == ERR_PTR(-ENOMSG)) {
+ /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
+ uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
+ } else {
+ if (IS_ERR(obj))
+ return ERR_PTR(-EINVAL);
+
+ uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ if (uobj->uapi_object != obj) {
+ ret = -EINVAL;
+ goto free;
+ }
+ }
+
+ /*
+ * If we have been disassociated block every command except for
+ * DESTROY based commands.
+ */
+ if (mode != UVERBS_LOOKUP_DESTROY &&
+ !srcu_dereference(ufile->device->ib_dev,
+ &ufile->device->disassociate_srcu)) {
+ ret = -EIO;
+ goto free;
+ }
+
+ ret = uverbs_try_lock_object(uobj, mode);
+ if (ret)
+ goto free;
+ if (attrs)
+ attrs->context = uobj->context;
+
+ return uobj;
+free:
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *
+alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs)
+{
+ int ret;
+ struct ib_uobject *uobj;
+
+ uobj = alloc_uobj(attrs, obj);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = idr_add_uobj(uobj);
+ if (ret)
+ goto uobj_put;
+
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto remove;
+
+ return uobj;
+
+remove:
+ xa_erase(&attrs->ufile->idr, uobj->id);
+uobj_put:
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *
+alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+ int new_fd;
+ struct ib_uobject *uobj;
+ struct file *filp;
+
+ if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release))
+ return ERR_PTR(-EINVAL);
+
+ new_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (new_fd < 0)
+ return ERR_PTR(new_fd);
+
+ uobj = alloc_uobj(attrs, obj);
+ if (IS_ERR(uobj))
+ goto err_fd;
+
+ /* Note that uverbs_uobject_fd_release() is called during abort */
+ filp = alloc_file(fd_type->flags, fd_type->fops);
+ if (IS_ERR(filp)) {
+ uobj = ERR_CAST(filp);
+ goto err_uobj;
+ }
+ uobj->object = filp;
+
+ uobj->id = new_fd;
+ return uobj;
+
+err_uobj:
+ uverbs_uobject_put(uobj);
+err_fd:
+ put_unused_fd(new_fd);
+ return uobj;
+}
+
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_uobject *ret;
+
+ if (IS_ERR(obj))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * The hw_destroy_rwsem is held across the entire object creation and
+ * released during rdma_alloc_commit_uobject or
+ * rdma_alloc_abort_uobject
+ */
+ if (!down_read_trylock(&ufile->hw_destroy_rwsem))
+ return ERR_PTR(-EIO);
+
+ ret = obj->type_class->alloc_begin(obj, attrs);
+ if (IS_ERR(ret)) {
+ up_read(&ufile->hw_destroy_rwsem);
+ return ret;
+ }
+ return ret;
+}
+
+static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
+{
+ xa_erase(&uobj->ufile->idr, uobj->id);
+}
+
+static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_obj_idr_type *idr_type =
+ container_of(uobj->uapi_object->type_attrs,
+ struct uverbs_obj_idr_type, type);
+ int ret = idr_type->destroy_object(uobj, why, attrs);
+
+ /*
+ * We can only fail gracefully if the user requested to destroy the
+ * object or when a retry may be called upon an error.
+ * In the rest of the cases, just remove whatever you can.
+ */
+ if (ib_is_destroy_retryable(ret, why, uobj))
+ return ret;
+
+ if (why == RDMA_REMOVE_ABORT)
+ return 0;
+
+ return 0;
+}
+
+static void remove_handle_idr_uobject(struct ib_uobject *uobj)
+{
+ xa_erase(&uobj->ufile->idr, uobj->id);
+ /* Matches the kref in alloc_commit_idr_uobject */
+ uverbs_uobject_put(uobj);
+}
+
+static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
+{
+ struct file *filp = uobj->object;
+
+ fput(filp);
+ put_unused_fd(uobj->id);
+}
+
+static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_obj_fd_type *fd_type = container_of(
+ uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
+ int ret = fd_type->destroy_object(uobj, why);
+
+ if (ib_is_destroy_retryable(ret, why, uobj))
+ return ret;
+
+ return 0;
+}
+
+static void remove_handle_fd_uobject(struct ib_uobject *uobj)
+{
+}
+
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uverbs_file *ufile = uobj->ufile;
+ void *old;
+
+ /*
+ * We already allocated this IDR with a NULL object, so
+ * this shouldn't fail.
+ *
+ * NOTE: Storing the uobj transfers our kref on uobj to the XArray.
+ * It will be put by remove_commit_idr_uobject()
+ */
+ old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL);
+ WARN_ON(old != NULL_IB_UOBJECT);
+}
+
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+{
+ int fd = uobj->id;
+ struct file *filp = uobj->object;
+
+ /* Matching put will be done in uverbs_uobject_fd_release() */
+ kref_get(&uobj->ufile->ref);
+
+ /* This shouldn't be used anymore. Use the file object instead */
+ uobj->id = 0;
+
+ /*
+ * NOTE: Once we install the file we loose ownership of our kref on
+ * uobj. It will be put by uverbs_uobject_fd_release()
+ */
+ filp->private_data = uobj;
+ fd_install(fd, filp);
+}
+
+/*
+ * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
+ * caller can no longer assume uobj is valid. If this function fails it
+ * destroys the uboject, including the attached HW object.
+ */
+void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+
+ /* alloc_commit consumes the uobj kref */
+ uobj->uapi_object->type_class->alloc_commit(uobj);
+
+ /* kref is held so long as the uobj is on the uobj list. */
+ uverbs_uobject_get(uobj);
+ spin_lock_irq(&ufile->uobjects_lock);
+ list_add(&uobj->list, &ufile->uobjects);
+ spin_unlock_irq(&ufile->uobjects_lock);
+
+ /* matches atomic_set(-1) in alloc_uobj */
+ atomic_set(&uobj->usecnt, 0);
+
+ /* Matches the down_read in rdma_alloc_begin_uobject */
+ up_read(&ufile->hw_destroy_rwsem);
+}
+
+/*
+ * This consumes the kref for uobj. It is up to the caller to unwind the HW
+ * object and anything else connected to uobj before calling this.
+ */
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = uobj->ufile;
+
+ uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
+
+ /* Matches the down_read in rdma_alloc_begin_uobject */
+ up_read(&ufile->hw_destroy_rwsem);
+}
+
+static void lookup_put_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+}
+
+static void lookup_put_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+ struct file *filp = uobj->object;
+
+ WARN_ON(mode != UVERBS_LOOKUP_READ);
+ /*
+ * This indirectly calls uverbs_uobject_fd_release() and free the
+ * object
+ */
+ fput(filp);
+}
+
+void rdma_lookup_put_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+ assert_uverbs_usecnt(uobj, mode);
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
+ /*
+ * In order to unlock an object, either decrease its usecnt for
+ * read access or zero it in case of exclusive access. See
+ * uverbs_try_lock_object for locking schema information.
+ */
+ switch (mode) {
+ case UVERBS_LOOKUP_READ:
+ atomic_dec(&uobj->usecnt);
+ break;
+ case UVERBS_LOOKUP_WRITE:
+ atomic_set(&uobj->usecnt, 0);
+ break;
+ case UVERBS_LOOKUP_DESTROY:
+ break;
+ }
+
+ /* Pairs with the kref obtained by type->lookup_get */
+ uverbs_uobject_put(uobj);
+}
+
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+ xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC);
+}
+
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+ struct ib_uobject *entry;
+ unsigned long id;
+
+ /*
+ * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
+ * there are no HW objects left, however the xarray is still populated
+ * with anything that has not been cleaned up by userspace. Since the
+ * kref on ufile is 0, nothing is allowed to call lookup_get.
+ *
+ * This is an optimized equivalent to remove_handle_idr_uobject
+ */
+ xa_for_each(&ufile->idr, id, entry) {
+ WARN_ON(entry->object);
+ uverbs_uobject_put(entry);
+ }
+
+ xa_destroy(&ufile->idr);
+}
+
+const struct uverbs_obj_type_class uverbs_idr_class = {
+ .alloc_begin = alloc_begin_idr_uobject,
+ .lookup_get = lookup_get_idr_uobject,
+ .alloc_commit = alloc_commit_idr_uobject,
+ .alloc_abort = alloc_abort_idr_uobject,
+ .lookup_put = lookup_put_idr_uobject,
+ .destroy_hw = destroy_hw_idr_uobject,
+ .remove_handle = remove_handle_idr_uobject,
+};
+EXPORT_SYMBOL(uverbs_idr_class);
+
+/*
+ * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
+ * file_operations release method.
+ */
+int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_file *ufile;
+ struct ib_uobject *uobj;
+
+ /*
+ * This can only happen if the fput came from alloc_abort_fd_uobject()
+ */
+ if (!filp->private_data)
+ return 0;
+ uobj = filp->private_data;
+ ufile = uobj->ufile;
+
+ if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
+ struct uverbs_attr_bundle attrs = {
+ .context = uobj->context,
+ .ufile = ufile,
+ };
+
+ /*
+ * lookup_get_fd_uobject holds the kref on the struct file any
+ * time a FD uobj is locked, which prevents this release
+ * method from being invoked. Meaning we can always get the
+ * write lock here, or we have a kernel bug.
+ */
+ WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
+ uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE, &attrs);
+ up_read(&ufile->hw_destroy_rwsem);
+ }
+
+ /* Matches the get in alloc_commit_fd_uobject() */
+ kref_put(&ufile->ref, ib_uverbs_release_file);
+
+ /* Pairs with filp->private_data in alloc_begin_fd_uobject */
+ uverbs_uobject_put(uobj);
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_uobject_fd_release);
+
+/*
+ * Drop the ucontext off the ufile and completely disconnect it from the
+ * ib_device
+ */
+static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
+{
+ struct ib_ucontext *ucontext = ufile->ucontext;
+ struct ib_device *ib_dev = ucontext->device;
+
+ /*
+ * If we are closing the FD then the user mmap VMAs must have
+ * already been destroyed as they hold on to the filep, otherwise
+ * they need to be zap'd.
+ */
+ if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
+ uverbs_user_mmap_disassociate(ufile);
+ if (ib_dev->disassociate_ucontext)
+ ib_dev->disassociate_ucontext(ucontext);
+ }
+
+ ib_dev->dealloc_ucontext(ucontext);
+ WARN_ON(!xa_empty(&ucontext->mmap_xa));
+ kfree(ucontext);
+
+ ufile->ucontext = NULL;
+}
+
+static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
+{
+ struct ib_uobject *obj, *next_obj;
+ int ret = -EINVAL;
+ struct uverbs_attr_bundle attrs = { .ufile = ufile };
+
+ /*
+ * This shouldn't run while executing other commands on this
+ * context. Thus, the only thing we should take care of is
+ * releasing a FD while traversing this list. The FD could be
+ * closed and released from the _release fop of this FD.
+ * In order to mitigate this, we add a lock.
+ * We take and release the lock per traversal in order to let
+ * other threads (which might still use the FDs) chance to run.
+ */
+ list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
+ attrs.context = obj->context;
+ /*
+ * if we hit this WARN_ON, that means we are
+ * racing with a lookup_get.
+ */
+ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
+ if (!uverbs_destroy_uobject(obj, reason, &attrs))
+ ret = 0;
+ else
+ atomic_set(&obj->usecnt, 0);
+ }
+ return ret;
+}
+
+/*
+ * Destroy the uncontext and every uobject associated with it.
+ *
+ * This is internally locked and can be called in parallel from multiple
+ * contexts.
+ */
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
+{
+ down_write(&ufile->hw_destroy_rwsem);
+
+ /*
+ * If a ucontext was never created then we can't have any uobjects to
+ * cleanup, nothing to do.
+ */
+ if (!ufile->ucontext)
+ goto done;
+
+ ufile->ucontext->closing = true;
+ ufile->ucontext->cleanup_retryable = true;
+ while (!list_empty(&ufile->uobjects))
+ if (__uverbs_cleanup_ufile(ufile, reason)) {
+ /*
+ * No entry was cleaned-up successfully during this
+ * iteration
+ */
+ break;
+ }
+
+ ufile->ucontext->cleanup_retryable = false;
+ if (!list_empty(&ufile->uobjects))
+ __uverbs_cleanup_ufile(ufile, reason);
+
+ ufile_destroy_ucontext(ufile, reason);
+
+done:
+ up_write(&ufile->hw_destroy_rwsem);
+}
+
+const struct uverbs_obj_type_class uverbs_fd_class = {
+ .alloc_begin = alloc_begin_fd_uobject,
+ .lookup_get = lookup_get_fd_uobject,
+ .alloc_commit = alloc_commit_fd_uobject,
+ .alloc_abort = alloc_abort_fd_uobject,
+ .lookup_put = lookup_put_fd_uobject,
+ .destroy_hw = destroy_hw_fd_uobject,
+ .remove_handle = remove_handle_fd_uobject,
+};
+EXPORT_SYMBOL(uverbs_fd_class);
+
+struct ib_uobject *
+uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
+ s64 id, struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_api_object *obj =
+ uapi_get_object(attrs->ufile->device->uapi, object_id);
+
+ switch (access) {
+ case UVERBS_ACCESS_READ:
+ return rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_READ, attrs);
+ case UVERBS_ACCESS_DESTROY:
+ /* Actual destruction is done inside uverbs_handle_method */
+ return rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_DESTROY, attrs);
+ case UVERBS_ACCESS_WRITE:
+ return rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_WRITE, attrs);
+ case UVERBS_ACCESS_NEW:
+ return rdma_alloc_begin_uobject(obj, attrs);
+ default:
+ WARN_ON(true);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+void uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access, bool commit,
+ struct uverbs_attr_bundle *attrs)
+{
+ /*
+ * refcounts should be handled at the object level and not at the
+ * uobject level. Refcounts of the objects themselves are done in
+ * handlers.
+ */
+
+ switch (access) {
+ case UVERBS_ACCESS_READ:
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
+ break;
+ case UVERBS_ACCESS_WRITE:
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+ break;
+ case UVERBS_ACCESS_DESTROY:
+ if (uobj)
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
+ break;
+ case UVERBS_ACCESS_NEW:
+ if (commit)
+ rdma_alloc_commit_uobject(uobj, attrs);
+ else
+ rdma_alloc_abort_uobject(uobj, attrs);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
diff --git a/sys/ofed/drivers/infiniband/core/ib_sa_query.c b/sys/ofed/drivers/infiniband/core/ib_sa_query.c
--- a/sys/ofed/drivers/infiniband/core/ib_sa_query.c
+++ b/sys/ofed/drivers/infiniband/core/ib_sa_query.c
@@ -495,7 +495,7 @@
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
- ib_destroy_ah(sm_ah->ah);
+ ib_destroy_ah(sm_ah->ah, 0);
kfree(sm_ah);
}
@@ -535,7 +535,7 @@
ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
}
- new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
+ new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE);
if (IS_ERR(new_ah->ah)) {
pr_warn("Couldn't create new SM AH\n");
kfree(new_ah);
diff --git a/sys/ofed/drivers/infiniband/core/ib_user_mad.c b/sys/ofed/drivers/infiniband/core/ib_user_mad.c
--- a/sys/ofed/drivers/infiniband/core/ib_user_mad.c
+++ b/sys/ofed/drivers/infiniband/core/ib_user_mad.c
@@ -201,7 +201,7 @@
struct ib_umad_packet *packet = send_wc->send_buf->context[0];
dequeue_send(file, packet);
- ib_destroy_ah(packet->msg->ah);
+ ib_destroy_ah(packet->msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
@@ -509,7 +509,7 @@
ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
}
- ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ ah = ib_create_user_ah(agent->qp->pd, &ah_attr, NULL);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_up;
@@ -603,7 +603,7 @@
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- ib_destroy_ah(ah);
+ ib_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
err_up:
mutex_unlock(&file->mutex);
err:
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c
--- a/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c
@@ -40,392 +40,291 @@
#define LINUXKPI_PARAM_PREFIX ibcore_
+#include <sys/priv.h>
+
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/sched.h>
-#include <linux/rbtree.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
#include "uverbs.h"
#include "core_priv.h"
-#include <sys/priv.h>
-
-struct uverbs_lock_class {
- char name[16];
-};
-
-static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
-static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
-static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
-static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
-static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
-static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
-static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
-static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
-static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
-static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
-
/*
- * The ib_uobject locking scheme is as follows:
- *
- * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- * needs to be held during all idr write operations. When an object is
- * looked up, a reference must be taken on the object's kref before
- * dropping this lock. For read operations, the rcu_read_lock()
- * and rcu_write_lock() but similarly the kref reference is grabbed
- * before the rcu_read_unlock().
+ * Copy a response to userspace. If the provided 'resp' is larger than the
+ * user buffer it is silently truncated. If the user provided a larger buffer
+ * then the trailing portion is zero filled.
*
- * - Each object also has an rwsem. This rwsem must be held for
- * reading while an operation that uses the object is performed.
- * For example, while registering an MR, the associated PD's
- * uobject.mutex must be held for reading. The rwsem must be held
- * for writing while initializing or destroying an object.
- *
- * - In addition, each object has a "live" flag. If this flag is not
- * set, then lookups of the object will fail even if it is found in
- * the idr. This handles a reader that blocks and does not acquire
- * the rwsem until after the object is destroyed. The destroy
- * operation will set the live flag to 0 and then drop the rwsem;
- * this will allow the reader to acquire the rwsem, see that the
- * live flag is 0, and then drop the rwsem and its reference to
- * object. The underlying storage will not be freed until the last
- * reference to the object is dropped.
+ * These semantics are intended to support future extension of the output
+ * structures.
*/
-
-static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
- struct ib_ucontext *context, struct uverbs_lock_class *c)
-{
- uobj->user_handle = user_handle;
- uobj->context = context;
- kref_init(&uobj->ref);
- init_rwsem(&uobj->mutex);
- uobj->live = 0;
-}
-
-static void release_uobj(struct kref *kref)
-{
- kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
-}
-
-static void put_uobj(struct ib_uobject *uobj)
-{
- kref_put(&uobj->ref, release_uobj);
-}
-
-static void put_uobj_read(struct ib_uobject *uobj)
-{
- up_read(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static void put_uobj_write(struct ib_uobject *uobj)
-{
- up_write(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
+static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp,
+ size_t resp_len)
{
int ret;
- idr_preload(GFP_KERNEL);
- spin_lock(&ib_uverbs_idr_lock);
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT))
+ return uverbs_copy_to_struct_or_zero(
+ attrs, UVERBS_ATTR_CORE_OUT, resp, resp_len);
- ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
- if (ret >= 0)
- uobj->id = ret;
-
- spin_unlock(&ib_uverbs_idr_lock);
- idr_preload_end();
-
- return ret < 0 ? ret : 0;
-}
-
-void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
-{
- spin_lock(&ib_uverbs_idr_lock);
- idr_remove(idr, uobj->id);
- spin_unlock(&ib_uverbs_idr_lock);
-}
-
-static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- rcu_read_lock();
- uobj = idr_find(idr, id);
- if (uobj) {
- if (uobj->context == context)
- kref_get(&uobj->ref);
- else
- uobj = NULL;
- }
- rcu_read_unlock();
-
- return uobj;
-}
-
-static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
- struct ib_ucontext *context, int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
+ if (copy_to_user(attrs->ucore.outbuf, resp,
+ min(attrs->ucore.outlen, resp_len)))
+ return -EFAULT;
- if (nested)
- down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
- else
- down_read(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_read(uobj);
- return NULL;
+ if (resp_len < attrs->ucore.outlen) {
+ /*
+ * Zero fill any extra memory that user
+ * space might have provided.
+ */
+ ret = clear_user(attrs->ucore.outbuf + resp_len,
+ attrs->ucore.outlen - resp_len);
+ if (ret)
+ return -EFAULT;
}
- return uobj;
+ return 0;
}
-static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
+/*
+ * Copy a request from userspace. If the provided 'req' is larger than the
+ * user buffer then the user buffer is zero extended into the 'req'. If 'req'
+ * is smaller than the user buffer then the uncopied bytes in the user buffer
+ * must be zero.
+ */
+static int uverbs_request(struct uverbs_attr_bundle *attrs, void *req,
+ size_t req_len)
{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
+ if (copy_from_user(req, attrs->ucore.inbuf,
+ min(attrs->ucore.inlen, req_len)))
+ return -EFAULT;
- down_write(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_write(uobj);
- return NULL;
+ if (attrs->ucore.inlen < req_len) {
+ memset((u8 *)req + attrs->ucore.inlen, 0,
+ req_len - attrs->ucore.inlen);
+ } else if (attrs->ucore.inlen > req_len) {
+ if (!ib_is_buffer_cleared(attrs->ucore.inbuf + req_len,
+ attrs->ucore.inlen - req_len))
+ return -EOPNOTSUPP;
}
-
- return uobj;
+ return 0;
}
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
- int nested)
+/*
+ * Generate the value for the 'response_length' protocol used by write_ex.
+ * This is the number of bytes the kernel actually wrote. Userspace can use
+ * this to detect what structure members in the response the kernel
+ * understood.
+ */
+static u32 uverbs_response_length(struct uverbs_attr_bundle *attrs,
+ size_t resp_len)
{
- struct ib_uobject *uobj;
-
- uobj = idr_read_uobj(idr, id, context, nested);
- return uobj ? uobj->object : NULL;
+ return min_t(size_t, attrs->ucore.outlen, resp_len);
}
-static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
-}
+/*
+ * The iterator version of the request interface is for handlers that need to
+ * step over a flex array at the end of a command header.
+ */
+struct uverbs_req_iter {
+ const u8 __user *cur;
+ const u8 __user *end;
+};
-static void put_pd_read(struct ib_pd *pd)
+static int uverbs_request_start(struct uverbs_attr_bundle *attrs,
+ struct uverbs_req_iter *iter,
+ void *req,
+ size_t req_len)
{
- put_uobj_read(pd->uobject);
-}
+ if (attrs->ucore.inlen < req_len)
+ return -ENOSPC;
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
-{
- return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
-}
+ if (copy_from_user(req, attrs->ucore.inbuf, req_len))
+ return -EFAULT;
-static void put_cq_read(struct ib_cq *cq)
-{
- put_uobj_read(cq->uobject);
+ iter->cur = attrs->ucore.inbuf + req_len;
+ iter->end = attrs->ucore.inbuf + attrs->ucore.inlen;
+ return 0;
}
-static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
+static int uverbs_request_next(struct uverbs_req_iter *iter, void *val,
+ size_t len)
{
- return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
-}
+ if (iter->cur + len > iter->end)
+ return -ENOSPC;
-static void put_ah_read(struct ib_ah *ah)
-{
- put_uobj_read(ah->uobject);
-}
+ if (copy_from_user(val, iter->cur, len))
+ return -EFAULT;
-static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
+ iter->cur += len;
+ return 0;
}
-static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
+static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter,
+ size_t len)
{
- return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
-}
+ const void __user *res = iter->cur;
-static void put_wq_read(struct ib_wq *wq)
-{
- put_uobj_read(wq->uobject);
+ if (iter->cur + len > iter->end)
+ return (void __force __user *)ERR_PTR(-ENOSPC);
+ iter->cur += len;
+ return res;
}
-static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
- struct ib_ucontext *context)
+static int uverbs_request_finish(struct uverbs_req_iter *iter)
{
- return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
+ if (!ib_is_buffer_cleared(iter->cur, iter->end - iter->cur))
+ return -EOPNOTSUPP;
+ return 0;
}
-static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
+/*
+ * When calling a destroy function during an error unwind we need to pass in
+ * the udata that is sanitized of all user arguments. Ie from the driver
+ * perspective it looks like no udata was passed.
+ */
+struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs)
{
- put_uobj_read(ind_table->uobject);
+ attrs->driver_udata = (struct ib_udata){};
+ return &attrs->driver_udata;
}
-static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
+static struct ib_uverbs_completion_event_file *
+_ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs)
{
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
+ fd, attrs);
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
- return uobj ? uobj->object : NULL;
-}
+ if (IS_ERR(uobj))
+ return (void *)uobj;
-static void put_qp_read(struct ib_qp *qp)
-{
- put_uobj_read(qp->uobject);
-}
+ uverbs_uobject_get(uobj);
+ uobj_put_read(uobj);
-static void put_qp_write(struct ib_qp *qp)
-{
- put_uobj_write(qp->uobject);
+ return container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
}
+#define ib_uverbs_lookup_comp_file(_fd, _ufile) ({ \
+ CTASSERT(sizeof(_fd) == sizeof(s32)); \
+ _ib_uverbs_lookup_comp_file(_fd, _ufile); \
+})
-static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs)
{
- return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
-}
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
-static void put_srq_read(struct ib_srq *srq)
-{
- put_uobj_read(srq->uobject);
-}
+ ib_dev = srcu_dereference(ufile->device->ib_dev,
+ &ufile->device->disassociate_srcu);
+ if (!ib_dev)
+ return -EIO;
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
- struct ib_uobject **uobj)
-{
- *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
- return *uobj ? (*uobj)->object : NULL;
-}
+ ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext);
+ if (!ucontext)
+ return -ENOMEM;
-static void put_xrcd_read(struct ib_uobject *uobj)
-{
- put_uobj_read(uobj);
+ ucontext->device = ib_dev;
+ ucontext->ufile = ufile;
+ xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+ attrs->context = ucontext;
+ return 0;
}
-ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_get_context cmd;
- struct ib_uverbs_get_context_resp resp;
- struct ib_udata udata;
- struct ib_ucontext *ucontext;
- struct file *filp;
+ struct ib_ucontext *ucontext = attrs->context;
+ struct ib_uverbs_file *file = attrs->ufile;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- mutex_lock(&file->mutex);
-
+ if (!down_read_trylock(&file->hw_destroy_rwsem))
+ return -EIO;
+ mutex_lock(&file->ucontext_lock);
if (file->ucontext) {
ret = -EINVAL;
goto err;
}
- ib_uverbs_init_udata(&udata, buf + sizeof cmd,
- u64_to_user_ptr(cmd.response + sizeof resp),
- in_len - sizeof cmd, out_len - sizeof resp);
-
- ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
- if (IS_ERR(ucontext)) {
- ret = PTR_ERR(ucontext);
- goto err;
- }
+ ret = ucontext->device->alloc_ucontext(ucontext,
+ &attrs->driver_udata);
+ if (ret)
+ goto err_uncharge;
- ucontext->device = ib_dev;
- INIT_LIST_HEAD(&ucontext->pd_list);
- INIT_LIST_HEAD(&ucontext->mr_list);
- INIT_LIST_HEAD(&ucontext->mw_list);
- INIT_LIST_HEAD(&ucontext->cq_list);
- INIT_LIST_HEAD(&ucontext->qp_list);
- INIT_LIST_HEAD(&ucontext->srq_list);
- INIT_LIST_HEAD(&ucontext->ah_list);
- INIT_LIST_HEAD(&ucontext->wq_list);
- INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
- INIT_LIST_HEAD(&ucontext->xrcd_list);
- INIT_LIST_HEAD(&ucontext->rule_list);
- rcu_read_lock();
- ucontext->tgid = get_pid(task_pid_group_leader(current));
- rcu_read_unlock();
- ucontext->closing = 0;
- ucontext->cleanup_retryable = false;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- ucontext->umem_tree = RB_ROOT;
- init_rwsem(&ucontext->umem_rwsem);
- ucontext->odp_mrs_count = 0;
- INIT_LIST_HEAD(&ucontext->no_private_counters);
-
- if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
- ucontext->invalidate_range = NULL;
-
-#endif
-
- resp.num_comp_vectors = file->device->num_comp_vectors;
-
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- goto err_free;
- resp.async_fd = ret;
+ /*
+ * Make sure that ib_uverbs_get_ucontext() sees the pointer update
+ * only after all writes to setup the ucontext have completed
+ */
+ atomic_store_rel_ptr((uintptr_t *)&file->ucontext, (uintptr_t)ucontext);
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
- if (IS_ERR(filp)) {
- ret = PTR_ERR(filp);
- goto err_fd;
- }
+ mutex_unlock(&file->ucontext_lock);
+ up_read(&file->hw_destroy_rwsem);
+ return 0;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_file;
- }
+err_uncharge:
+err:
+ mutex_unlock(&file->ucontext_lock);
+ up_read(&file->hw_destroy_rwsem);
+ return ret;
+}
- file->ucontext = ucontext;
+static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_get_context_resp resp;
+ struct ib_uverbs_get_context cmd;
+ struct ib_device *ib_dev;
+ struct ib_uobject *uobj;
+ int ret;
- fd_install(resp.async_fd, filp);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- mutex_unlock(&file->mutex);
+ ret = ib_alloc_ucontext(attrs);
+ if (ret)
+ return ret;
- return in_len;
+ uobj = uobj_alloc(UVERBS_OBJECT_ASYNC_EVENT, attrs, &ib_dev);
+ if (IS_ERR(uobj)) {
+ ret = PTR_ERR(uobj);
+ goto err_ucontext;
+ }
-err_file:
- ib_uverbs_free_async_event_file(file);
- fput(filp);
+ resp = (struct ib_uverbs_get_context_resp){
+ .num_comp_vectors = attrs->ufile->device->num_comp_vectors,
+ .async_fd = uobj->id,
+ };
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
+ goto err_uobj;
-err_fd:
- put_unused_fd(resp.async_fd);
+ ret = ib_init_ucontext(attrs);
+ if (ret)
+ goto err_uobj;
-err_free:
- put_pid(ucontext->tgid);
- ib_dev->dealloc_ucontext(ucontext);
+ ib_uverbs_init_async_event_file(
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
-err:
- mutex_unlock(&file->mutex);
+err_uobj:
+ rdma_alloc_abort_uobject(uobj, attrs);
+err_ucontext:
+ kfree(attrs->context);
+ attrs->context = NULL;
return ret;
}
-static void copy_query_dev_fields(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
+static void copy_query_dev_fields(struct ib_ucontext *ucontext,
struct ib_uverbs_query_device_resp *resp,
struct ib_device_attr *attr)
{
+ struct ib_device *ib_dev = ucontext->device;
+
resp->fw_ver = attr->fw_ver;
resp->node_guid = ib_dev->node_guid;
resp->sys_image_guid = attr->sys_image_guid;
@@ -436,8 +335,8 @@
resp->hw_ver = attr->hw_ver;
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
- resp->device_cap_flags = (u32)(attr->device_cap_flags);
- resp->max_sge = attr->max_sge;
+ resp->device_cap_flags = (u32)attr->device_cap_flags;
+ resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;
resp->max_cqe = attr->max_cqe;
@@ -468,113 +367,75 @@
resp->phys_port_cnt = ib_dev->phys_port_cnt;
}
-ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
+ struct ib_ucontext *ucontext;
+ int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&resp, 0, sizeof resp);
- copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
+ copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_port cmd;
struct ib_uverbs_query_port_resp resp;
struct ib_port_attr attr;
int ret;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
ret = ib_query_port(ib_dev, cmd.port_num, &attr);
if (ret)
return ret;
memset(&resp, 0, sizeof resp);
+ copy_port_attr_to_resp(&attr, &resp, ib_dev, cmd.port_num);
- resp.state = attr.state;
- resp.max_mtu = attr.max_mtu;
- resp.active_mtu = attr.active_mtu;
- resp.gid_tbl_len = attr.gid_tbl_len;
- resp.port_cap_flags = attr.port_cap_flags;
- resp.max_msg_sz = attr.max_msg_sz;
- resp.bad_pkey_cntr = attr.bad_pkey_cntr;
- resp.qkey_viol_cntr = attr.qkey_viol_cntr;
- resp.pkey_tbl_len = attr.pkey_tbl_len;
- resp.lid = attr.lid;
- resp.sm_lid = attr.sm_lid;
- resp.lmc = attr.lmc;
- resp.max_vl_num = attr.max_vl_num;
- resp.sm_sl = attr.sm_sl;
- resp.subnet_timeout = attr.subnet_timeout;
- resp.init_type_reply = attr.init_type_reply;
- resp.active_width = attr.active_width;
- resp.active_speed = attr.active_speed;
- resp.phys_state = attr.phys_state;
- resp.link_layer = rdma_port_get_link_layer(ib_dev,
- cmd.port_num);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_pd cmd;
struct ib_uverbs_alloc_pd_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
+ struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd,
- u64_to_user_ptr(cmd.response + sizeof resp),
- in_len - sizeof cmd, out_len - sizeof resp);
-
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
- if (IS_ERR(pd)) {
- ret = PTR_ERR(pd);
+ pd = rdma_zalloc_drv_obj(ib_dev, ib_pd);
+ if (!pd) {
+ ret = -ENOMEM;
goto err;
}
@@ -583,85 +444,41 @@
pd->__internal_mr = NULL;
atomic_set(&pd->usecnt, 0);
- uobj->object = pd;
- ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
+ ret = ib_dev->alloc_pd(pd, &attrs->driver_udata);
if (ret)
- goto err_idr;
+ goto err_alloc;
+ uobj->object = pd;
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->pd_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
- up_write(&uobj->mutex);
-
- return in_len;
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
-
-err_idr:
- ib_dealloc_pd(pd);
-
+ ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs));
+ pd = NULL;
+err_alloc:
+ kfree(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
return ret;
}
-ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_dealloc_pd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_pd cmd;
- struct ib_uobject *uobj;
- struct ib_pd *pd;
- int ret;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- pd = uobj->object;
-
- if (atomic_read(&pd->usecnt)) {
- ret = -EBUSY;
- goto err_put;
- }
+ int ret;
- ret = pd->device->dealloc_pd(uobj->object);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
- goto err_put;
-
- uobj->live = 0;
- put_uobj_write(uobj);
-
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return ret;
-err_put:
- put_uobj_write(uobj);
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
}
struct xrcd_table_entry {
@@ -749,31 +566,23 @@
}
}
-ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_device *ibudev = attrs->ufile->device;
struct ib_uverbs_open_xrcd cmd;
struct ib_uverbs_open_xrcd_resp resp;
- struct ib_udata udata;
struct ib_uxrcd_object *obj;
struct ib_xrcd *xrcd = NULL;
struct vnode *vnode = NULL;
int ret = 0;
int new_xrcd = 0;
+ struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd,
- u64_to_user_ptr(cmd.response + sizeof resp),
- in_len - sizeof cmd, out_len - sizeof resp);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- mutex_lock(&file->device->xrcd_tree_mutex);
+ mutex_lock(&ibudev->xrcd_tree_mutex);
if (cmd.fd != -1) {
/* search for file descriptor */
@@ -781,7 +590,7 @@
if (ret != 0)
goto err_tree_mutex_unlock;
- xrcd = find_xrcd(file->device, vnode);
+ xrcd = find_xrcd(ibudev, vnode);
if (!xrcd && !(cmd.oflags & O_CREAT)) {
/* no file descriptor. Need CREATE flag */
ret = -EAGAIN;
@@ -794,18 +603,15 @@
}
}
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj) {
- ret = -ENOMEM;
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, attrs,
+ &ib_dev);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
goto err_tree_mutex_unlock;
}
- init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
-
- down_write(&obj->uobject.mutex);
-
if (!xrcd) {
- xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
+ xrcd = ib_dev->alloc_xrcd(ib_dev, &attrs->driver_udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
@@ -821,167 +627,103 @@
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.xrcd_handle = obj->uobject.id;
if (vnode != NULL) {
if (new_xrcd) {
/* create new vnode/xrcd table entry */
- ret = xrcd_table_insert(file->device, vnode, xrcd);
+ ret = xrcd_table_insert(ibudev, vnode, xrcd);
if (ret)
- goto err_insert_xrcd;
+ goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
if (vnode != NULL)
vrele(vnode);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
- up_write(&obj->uobject.mutex);
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
- mutex_unlock(&file->device->xrcd_tree_mutex);
- return in_len;
+ rdma_alloc_commit_uobject(&obj->uobject, attrs);
+ return 0;
err_copy:
if (vnode != NULL) {
if (new_xrcd)
- xrcd_table_delete(file->device, vnode);
+ xrcd_table_delete(ibudev, vnode);
atomic_dec(&xrcd->usecnt);
}
-err_insert_xrcd:
- idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
-
-err_idr:
- ib_dealloc_xrcd(xrcd);
+err_dealloc_xrcd:
+ ib_dealloc_xrcd(xrcd, uverbs_get_cleared_udata(attrs));
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject, attrs);
err_tree_mutex_unlock:
if (vnode != NULL)
vrele(vnode);
- mutex_unlock(&file->device->xrcd_tree_mutex);
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
return ret;
}
-ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_close_xrcd cmd;
- struct ib_uobject *uobj;
- struct ib_xrcd *xrcd = NULL;
- struct vnode *vnode = NULL;
- struct ib_uxrcd_object *obj;
- int live;
- int ret = 0;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- mutex_lock(&file->device->xrcd_tree_mutex);
- uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
- if (!uobj) {
- ret = -EINVAL;
- goto out;
- }
-
- xrcd = uobj->object;
- vnode = xrcd->vnode;
- obj = container_of(uobj, struct ib_uxrcd_object, uobject);
- if (atomic_read(&obj->refcnt)) {
- put_uobj_write(uobj);
- ret = -EBUSY;
- goto out;
- }
-
- if (!vnode || atomic_dec_and_test(&xrcd->usecnt)) {
- ret = ib_dealloc_xrcd(uobj->object);
- if (!ret)
- uobj->live = 0;
- }
-
- live = uobj->live;
- if (vnode && ret)
- atomic_inc(&xrcd->usecnt);
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
- goto out;
-
- if (vnode && !live)
- xrcd_table_delete(file->device, vnode);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- ret = in_len;
+ return ret;
-out:
- mutex_unlock(&file->device->xrcd_tree_mutex);
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs);
}
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
- struct ib_xrcd *xrcd)
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
{
struct vnode *vnode;
+ int ret;
+ struct ib_uverbs_device *dev = attrs->ufile->device;
vnode = xrcd->vnode;
if (vnode && !atomic_dec_and_test(&xrcd->usecnt))
- return;
+ return 0;
- ib_dealloc_xrcd(xrcd);
+ ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata);
+
+ if (ib_is_destroy_retryable(ret, why, uobject)) {
+ atomic_inc(&xrcd->usecnt);
+ return ret;
+ }
if (vnode)
xrcd_table_delete(dev, vnode);
+
+ return ret;
}
-ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
+ struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd,
- u64_to_user_ptr(cmd.response + sizeof resp),
- in_len - sizeof cmd, out_len - sizeof resp);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
@@ -990,14 +732,11 @@
if (ret)
return ret;
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1013,7 +752,8 @@
}
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags, &udata);
+ cmd.access_flags,
+ &attrs->driver_udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -1021,74 +761,52 @@
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
+ mr->dm = NULL;
+ mr->sig_attrs = NULL;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
uobj->object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
- if (ret)
- goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
resp.mr_handle = uobj->id;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
-
- put_pd_read(pd);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mr_list);
- mutex_unlock(&file->mutex);
+ uobj_put_obj_read(pd);
- uobj->live = 1;
-
- up_write(&uobj->mutex);
-
- return in_len;
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
-err_unreg:
- ib_dereg_mr(mr);
+ ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs));
err_put:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
return ret;
}
-ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_rereg_mr cmd;
struct ib_uverbs_rereg_mr_resp resp;
- struct ib_udata udata;
struct ib_pd *pd = NULL;
struct ib_mr *mr;
struct ib_pd *old_pd;
int ret;
struct ib_uobject *uobj;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response + sizeof(resp)),
- in_len - sizeof(cmd), out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
return -EINVAL;
@@ -1098,14 +816,17 @@
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
- file->ucontext);
-
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
mr = uobj->object;
+ if (mr->dm) {
+ ret = -EINVAL;
+ goto put_uobjs;
+ }
+
if (cmd.flags & IB_MR_REREG_ACCESS) {
ret = ib_check_mr_access(cmd.access_flags);
if (ret)
@@ -1113,7 +834,8 @@
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+ attrs);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -1122,116 +844,76 @@
old_pd = mr->pd;
ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
- cmd.length, cmd.hca_va,
- cmd.access_flags, pd, &udata);
- if (!ret) {
- if (cmd.flags & IB_MR_REREG_PD) {
- atomic_inc(&pd->usecnt);
- mr->pd = pd;
- atomic_dec(&old_pd->usecnt);
- }
- } else {
+ cmd.length, cmd.hca_va,
+ cmd.access_flags, pd,
+ &attrs->driver_udata);
+ if (ret)
goto put_uobj_pd;
+
+ if (cmd.flags & IB_MR_REREG_PD) {
+ atomic_inc(&pd->usecnt);
+ mr->pd = pd;
+ atomic_dec(&old_pd->usecnt);
}
memset(&resp, 0, sizeof(resp));
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp)))
- ret = -EFAULT;
- else
- ret = in_len;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
put_uobjs:
-
- put_uobj_write(mr->uobject);
+ uobj_put_write(uobj);
return ret;
}
-ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dereg_mr cmd;
- struct ib_mr *mr;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mr = uobj->object;
-
- ret = ib_dereg_mr(mr);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
}
-ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_mw cmd;
struct ib_uverbs_alloc_mw_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mw *mw;
- struct ib_udata udata;
int ret;
+ struct ib_device *ib_dev;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
}
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response + sizeof(resp)),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ if (cmd.mw_type != IB_MW_TYPE_1 && cmd.mw_type != IB_MW_TYPE_2) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
+ mw = pd->device->alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
goto err_put;
@@ -1243,264 +925,162 @@
atomic_inc(&pd->usecnt);
uobj->object = mw;
- ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
- if (ret)
- goto err_unalloc;
memset(&resp, 0, sizeof(resp));
resp.rkey = mw->rkey;
resp.mw_handle = uobj->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp))) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
-
- put_pd_read(pd);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mw_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
-
- return in_len;
+ uobj_put_obj_read(pd);
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
-err_unalloc:
uverbs_dealloc_mw(mw);
-
err_put:
- put_pd_read(pd);
-
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
return ret;
}
-ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dealloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_mw cmd;
- struct ib_mw *mw;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mw = uobj->object;
-
- ret = uverbs_dealloc_mw(mw);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, attrs);
}
-ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
- struct file *filp;
+ struct ib_uobject *uobj;
+ struct ib_uverbs_completion_event_file *ev_file;
+ struct ib_device *ib_dev;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- return ret;
- resp.fd = ret;
+ resp.fd = uobj->id;
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
- if (IS_ERR(filp)) {
- put_unused_fd(resp.fd);
- return PTR_ERR(filp);
- }
+ ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- put_unused_fd(resp.fd);
- fput(filp);
- return -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret) {
+ uobj_alloc_abort(uobj, attrs);
+ return ret;
}
- fd_install(resp.fd, filp);
- return in_len;
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
}
-static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_cq *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *udata,
- void *context),
- void *context)
+static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_cq *cmd)
{
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file = NULL;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
struct ib_uverbs_ex_create_cq_resp resp;
struct ib_cq_init_attr attr = {};
+ struct ib_device *ib_dev;
- if (cmd->comp_vector >= file->device->num_comp_vectors)
+ if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return ERR_PTR(-ENOMEM);
-
- init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
- down_write(&obj->uobject.mutex);
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return obj;
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
- if (!ev_file) {
- ret = -EINVAL;
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs);
+ if (IS_ERR(ev_file)) {
+ ret = PTR_ERR(ev_file);
goto err;
}
}
- obj->uverbs_file = file;
- obj->comp_events_reported = 0;
- obj->async_events_reported = 0;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
INIT_LIST_HEAD(&obj->comp_list);
- INIT_LIST_HEAD(&obj->async_list);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
attr.cqe = cmd->cqe;
attr.comp_vector = cmd->comp_vector;
+ attr.flags = cmd->flags;
- if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
- attr.flags = cmd->flags;
-
- cq = ib_dev->create_cq(ib_dev, &attr,
- file->ucontext, uhw);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
+ cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
+ if (!cq) {
+ ret = -ENOMEM;
goto err_file;
}
-
cq->device = ib_dev;
- cq->uobject = &obj->uobject;
+ cq->uobject = obj;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file;
+ cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
- obj->uobject.object = cq;
- ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
+ ret = ib_dev->create_cq(cq, &attr, &attrs->driver_udata);
if (ret)
goto err_free;
+ obj->uevent.uobject.object = cq;
memset(&resp, 0, sizeof resp);
- resp.base.cq_handle = obj->uobject.id;
+ resp.base.cq_handle = obj->uevent.uobject.id;
resp.base.cqe = cq->cqe;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
-
- ret = cb(file, obj, &resp, ucore, context);
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
goto err_cb;
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
-
- up_write(&obj->uobject.mutex);
-
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
return obj;
err_cb:
- idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
-
+ ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs));
+ cq = NULL;
err_free:
- ib_destroy_cq(cq);
-
+ kfree(cq);
err_file:
if (ev_file)
- ib_uverbs_release_ucq(file, ev_file, obj);
+ ib_uverbs_release_ucq(ev_file, obj);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return ERR_PTR(ret);
}
-static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
-}
-
-ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_ex_create_cq cmd_ex;
- struct ib_uverbs_create_cq_resp resp;
- struct ib_udata ucore;
- struct ib_udata uhw;
struct ib_ucq_object *obj;
+ int ret;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- ib_uverbs_init_udata(&ucore, buf,
- u64_to_user_ptr(cmd.response), sizeof(cmd), sizeof(resp));
-
- ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response + sizeof(resp)),
- in_len - sizeof(cmd), out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -1508,44 +1088,19 @@
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
- obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), comp_channel) +
- sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
- NULL);
-
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return in_len;
-}
-
-static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
-{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
+ obj = create_cq(attrs, &cmd_ex);
+ return PTR_ERR_OR_ZERO(obj);
}
-int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_cq_resp resp;
struct ib_uverbs_ex_create_cq cmd;
struct ib_ucq_object *obj;
- int err;
-
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask)
return -EINVAL;
@@ -1553,59 +1108,41 @@
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_cq_cb, NULL);
-
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return 0;
+ obj = create_cq(attrs, &cmd);
+ return PTR_ERR_OR_ZERO(obj);
}
-ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_resize_cq cmd;
- struct ib_uverbs_resize_cq_resp resp;
- struct ib_udata udata;
+ struct ib_uverbs_resize_cq_resp resp = {};
struct ib_cq *cq;
int ret = -EINVAL;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd,
- u64_to_user_ptr(cmd.response + sizeof resp),
- in_len - sizeof cmd, out_len - sizeof resp);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
- ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
+ ret = cq->device->resize_cq(cq, cmd.cqe, &attrs->driver_udata);
if (ret)
goto out;
resp.cqe = cq->cqe;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp.cqe))
- ret = -EFAULT;
-
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
- put_cq_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
- return ret ? ret : in_len;
+ return ret;
}
-static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
+static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
+ struct ib_wc *wc)
{
struct ib_uverbs_wc tmp;
@@ -1614,7 +1151,7 @@
tmp.opcode = wc->opcode;
tmp.vendor_err = wc->vendor_err;
tmp.byte_len = wc->byte_len;
- tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data;
+ tmp.ex.imm_data = wc->ex.imm_data;
tmp.qp_num = wc->qp->qp_num;
tmp.src_qp = wc->src_qp;
tmp.wc_flags = wc->wc_flags;
@@ -1631,10 +1168,7 @@
return 0;
}
-ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_poll_cq cmd;
struct ib_uverbs_poll_cq_resp resp;
@@ -1644,15 +1178,16 @@
struct ib_wc wc;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
/* we copy a struct ib_uverbs_poll_cq_resp to user space */
- header_ptr = (void __user *)(unsigned long) cmd.response;
+ header_ptr = attrs->ucore.outbuf;
data_ptr = header_ptr + sizeof resp;
memset(&resp, 0, sizeof resp);
@@ -1663,7 +1198,7 @@
if (!ret)
break;
- ret = copy_wc_to_user(data_ptr, &wc);
+ ret = copy_wc_to_user(cq->device, data_ptr, &wc);
if (ret)
goto out_put;
@@ -1675,135 +1210,98 @@
ret = -EFAULT;
goto out_put;
}
+ ret = 0;
- ret = in_len;
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT))
+ ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT);
out_put:
- put_cq_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
-ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_req_notify_cq cmd;
struct ib_cq *cq;
- int retval;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
- if (ib_req_notify_cq(cq, cmd.solicited_only ?
- IB_CQ_SOLICITED : IB_CQ_NEXT_COMP) < 0)
- retval = -ENXIO;
- else
- retval = in_len;
-
- put_cq_read(cq);
+ ib_req_notify_cq(cq, cmd.solicited_only ?
+ IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
- return retval;
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ return 0;
}
-ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_cq cmd;
struct ib_uverbs_destroy_cq_resp resp;
struct ib_uobject *uobj;
- struct ib_cq *cq;
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file;
- int ret = -EINVAL;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- cq = uobj->object;
- ev_file = cq->cq_context;
- obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
-
- ret = ib_destroy_cq(cq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_ucq(file, ev_file, obj);
-
- memset(&resp, 0, sizeof resp);
+ obj = container_of(uobj, struct ib_ucq_object, uevent.uobject);
+ memset(&resp, 0, sizeof(resp));
resp.comp_events_reported = obj->comp_events_reported;
- resp.async_events_reported = obj->async_events_reported;
+ resp.async_events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
+ uobj_put_destroy(uobj);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-static int create_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_qp *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *udata),
- void *context)
+static int create_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_qp *cmd)
{
struct ib_uqp_object *obj;
struct ib_device *device;
struct ib_pd *pd = NULL;
struct ib_xrcd *xrcd = NULL;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT);
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
- char *buf;
struct ib_qp_init_attr attr = {};
struct ib_uverbs_ex_create_qp_resp resp;
int ret;
struct ib_rwq_ind_table *ind_tbl = NULL;
bool has_sq = true;
+ struct ib_device *ib_dev;
if (cmd->qp_type == IB_QPT_RAW_PACKET && priv_check(curthread, PRIV_NET_RAW) != 0)
return -EPERM;
- obj = kzalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
- &qp_lock_class);
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+ obj->uxrcd = NULL;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
mutex_init(&obj->mcast_lock);
- down_write(&obj->uevent.uobject.mutex);
- if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
- sizeof(cmd->rwq_ind_tbl_handle) &&
- (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
- file->ucontext);
+
+ if (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE) {
+ ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ cmd->rwq_ind_tbl_handle, attrs);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1812,12 +1310,6 @@
attr.rwq_ind_tbl = ind_tbl;
}
- if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
- sizeof(cmd->reserved1)) && cmd->reserved1) {
- ret = -EOPNOTSUPP;
- goto err_put;
- }
-
if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
ret = -EINVAL;
goto err_put;
@@ -1827,8 +1319,15 @@
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
- &xrcd_uobj);
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
+ attrs);
+
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
goto err_put;
@@ -1840,9 +1339,9 @@
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = idr_read_srq(cmd->srq_handle,
- file->ucontext);
- if (!srq || srq->srq_type != IB_SRQT_BASIC) {
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
+ cmd->srq_handle, attrs);
+ if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
goto err_put;
}
@@ -1850,8 +1349,9 @@
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = idr_read_cq(cmd->recv_cq_handle,
- file->ucontext, 0);
+ rcq = uobj_get_obj_read(
+ cq, UVERBS_OBJECT_CQ,
+ cmd->recv_cq_handle, attrs);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1861,10 +1361,12 @@
}
if (has_sq)
- scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
+ scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+ cmd->send_cq_handle, attrs);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
+ attrs);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1874,7 +1376,6 @@
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
@@ -1890,35 +1391,36 @@
attr.cap.max_recv_sge = cmd->max_recv_sge;
attr.cap.max_inline_data = cmd->max_inline_data;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
- sizeof(cmd->create_flags))
- attr.create_flags = cmd->create_flags;
-
+ attr.create_flags = cmd->create_flags;
if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
- IB_QP_CREATE_SCATTER_FCS)) {
+ IB_QP_CREATE_SCATTER_FCS |
+ IB_QP_CREATE_CVLAN_STRIPPING |
+ IB_QP_CREATE_SOURCE_QPN |
+ IB_QP_CREATE_PCI_WRITE_END_PADDING)) {
ret = -EINVAL;
goto err_put;
}
- buf = (char *)cmd + sizeof(*cmd);
- if (cmd_sz > sizeof(*cmd))
- if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
- cmd_sz - sizeof(*cmd) - 1))) {
- ret = -EINVAL;
+ if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) {
+ if (priv_check(curthread, PRIV_NET_RAW)) {
+ ret = -EPERM;
goto err_put;
}
+ attr.source_qpn = cmd->source_qpn;
+ }
+
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = device->create_qp(pd, &attr, uhw);
+ qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
+ obj);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
@@ -1926,17 +1428,6 @@
}
if (cmd->qp_type != IB_QPT_XRC_TGT) {
- qp->real_qp = qp;
- qp->device = device;
- qp->pd = pd;
- qp->send_cq = attr.send_cq;
- qp->recv_cq = attr.recv_cq;
- qp->srq = attr.srq;
- qp->rwq_ind_tbl = ind_tbl;
- qp->event_handler = attr.event_handler;
- qp->qp_context = attr.qp_context;
- qp->qp_type = attr.qp_type;
- atomic_set(&qp->usecnt, 0);
atomic_inc(&pd->usecnt);
if (attr.send_cq)
atomic_inc(&attr.send_cq->usecnt);
@@ -1948,14 +1439,10 @@
atomic_inc(&ind_tbl->usecnt);
} else {
/* It is done in _ib_create_qp for other QP types */
- qp->uobject = &obj->uevent.uobject;
+ qp->uobject = obj;
}
- qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
memset(&resp, 0, sizeof resp);
resp.base.qpn = qp->qp_num;
@@ -1965,11 +1452,9 @@
resp.base.max_recv_wr = attr.cap.max_recv_wr;
resp.base.max_send_wr = attr.cap.max_send_wr;
resp.base.max_inline_data = attr.cap.max_inline_data;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
-
- ret = cb(file, &resp, ucore);
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
goto err_cb;
@@ -1977,87 +1462,58 @@
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (srq)
- put_srq_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(ind_tbl);
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
return 0;
err_cb:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
-err_destroy:
- ib_destroy_qp(qp);
+ ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
err_put:
- if (xrcd)
- put_xrcd_read(xrcd_uobj);
+ if (!IS_ERR(xrcd_uobj))
+ uobj_put_read(xrcd_uobj);
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (srq)
- put_srq_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
+ uobj_put_obj_read(ind_tbl);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return ret;
}
-static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
-}
-
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_qp cmd;
struct ib_uverbs_ex_create_qp cmd_ex;
- struct ib_udata ucore;
- struct ib_udata uhw;
- ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
- int err;
-
- if (out_len < resp_size)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ int ret;
- ib_uverbs_init_udata(&ucore, buf,
- u64_to_user_ptr(cmd.response), sizeof(cmd), resp_size);
- ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response + resp_size),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - resp_size);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -2074,43 +1530,17 @@
cmd_ex.qp_type = cmd.qp_type;
cmd_ex.is_srq = cmd.is_srq;
- err = create_qp(file, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), is_srq) +
- sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
- NULL);
-
- if (err)
- return err;
-
- return in_len;
+ return create_qp(attrs, &cmd_ex);
}
-static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
+static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs)
{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
-}
-
-int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
-{
- struct ib_uverbs_ex_create_qp_resp resp;
- struct ib_uverbs_ex_create_qp cmd = {0};
- int err;
-
- if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
- sizeof(cmd.comp_mask)))
- return -EINVAL;
+ struct ib_uverbs_ex_create_qp cmd;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
return -EINVAL;
@@ -2118,119 +1548,104 @@
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- err = create_qp(file, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_qp_cb, NULL);
-
- if (err)
- return err;
-
- return 0;
+ return create_qp(attrs, &cmd);
}
-ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_open_qp cmd;
struct ib_uverbs_create_qp_resp resp;
- struct ib_udata udata;
struct ib_uqp_object *obj;
struct ib_xrcd *xrcd;
struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_qp *qp;
- struct ib_qp_open_attr attr;
+ struct ib_qp_open_attr attr = {};
int ret;
+ struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd,
- u64_to_user_ptr(cmd.response + sizeof resp),
- in_len - sizeof cmd, out_len - sizeof resp);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, attrs);
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
- goto err_put;
+ goto err_xrcd;
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
attr.qp_num = cmd.qpn;
attr.qp_type = cmd.qp_type;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
qp = ib_open_qp(xrcd, &attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_xrcd;
}
- qp->uobject = &obj->uevent.uobject;
-
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd.user_handle;
memset(&resp, 0, sizeof resp);
resp.qpn = qp->qp_num;
resp.qp_handle = obj->uevent.uobject.id;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_remove;
- }
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
+ goto err_destroy;
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
+ qp->uobject = obj;
+ uobj_put_read(xrcd_uobj);
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
-
- return in_len;
-
-err_remove:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
+ return 0;
err_destroy:
- ib_destroy_qp(qp);
-
+ ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
+err_xrcd:
+ uobj_put_read(xrcd_uobj);
err_put:
- put_xrcd_read(xrcd_uobj);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return ret;
}
-ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
+ struct ib_ah_attr *rdma_attr)
+{
+ uverb_attr->dlid = rdma_attr->dlid;
+ uverb_attr->sl = rdma_attr->sl;
+ uverb_attr->src_path_bits = rdma_attr->src_path_bits;
+ uverb_attr->static_rate = rdma_attr->static_rate;
+ uverb_attr->is_global = !!(rdma_attr->ah_flags & IB_AH_GRH);
+ if (uverb_attr->is_global) {
+ const struct ib_global_route *grh = &rdma_attr->grh;
+
+ memcpy(uverb_attr->dgid, grh->dgid.raw, 16);
+ uverb_attr->flow_label = grh->flow_label;
+ uverb_attr->sgid_index = grh->sgid_index;
+ uverb_attr->hop_limit = grh->hop_limit;
+ uverb_attr->traffic_class = grh->traffic_class;
+ }
+ uverb_attr->port_num = rdma_attr->port_num;
+}
+
+static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_qp cmd;
struct ib_uverbs_query_qp_resp resp;
@@ -2239,8 +1654,9 @@
struct ib_qp_init_attr *init_attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
@@ -2249,7 +1665,7 @@
goto out;
}
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2257,7 +1673,8 @@
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
- put_qp_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
goto out;
@@ -2286,29 +1703,8 @@
resp.alt_port_num = attr->alt_port_num;
resp.alt_timeout = attr->alt_timeout;
- memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
- resp.dest.flow_label = attr->ah_attr.grh.flow_label;
- resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
- resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
- resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
- resp.dest.dlid = attr->ah_attr.dlid;
- resp.dest.sl = attr->ah_attr.sl;
- resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
- resp.dest.static_rate = attr->ah_attr.static_rate;
- resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
- resp.dest.port_num = attr->ah_attr.port_num;
-
- memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
- resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
- resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
- resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
- resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
- resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
- resp.alt_dest.sl = attr->alt_ah_attr.sl;
- resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
- resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
- resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
- resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+ copy_ah_attr_to_uverbs(&resp.dest, &attr->ah_attr);
+ copy_ah_attr_to_uverbs(&resp.alt_dest, &attr->alt_ah_attr);
resp.max_send_wr = init_attr->cap.max_send_wr;
resp.max_recv_wr = init_attr->cap.max_recv_wr;
@@ -2317,15 +1713,13 @@
resp.max_inline_data = init_attr->cap.max_inline_data;
resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
kfree(attr);
kfree(init_attr);
- return ret ? ret : in_len;
+ return ret;
}
/* Remove ignored fields set in the attribute mask */
@@ -2342,193 +1736,270 @@
}
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static void copy_ah_attr_from_uverbs(struct ib_device *dev,
+ struct ib_ah_attr *rdma_attr,
+ struct ib_uverbs_qp_dest *uverb_attr)
{
- struct ib_uverbs_modify_qp cmd;
- struct ib_udata udata;
- struct ib_qp *qp;
- struct ib_qp_attr *attr;
- int ret;
+ if (uverb_attr->is_global) {
+ struct ib_global_route *grh = &rdma_attr->grh;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ grh->flow_label = uverb_attr->flow_label;
+ grh->sgid_index = uverb_attr->sgid_index;
+ grh->hop_limit = uverb_attr->hop_limit;
+ grh->traffic_class = uverb_attr->traffic_class;
+ memcpy(grh->dgid.raw, uverb_attr->dgid, sizeof(grh->dgid));
+ rdma_attr->ah_flags = IB_AH_GRH;
+ } else {
+ rdma_attr->ah_flags = 0;
+ }
+ rdma_attr->dlid = uverb_attr->dlid;
+ rdma_attr->sl = uverb_attr->sl;
+ rdma_attr->src_path_bits = uverb_attr->src_path_bits;
+ rdma_attr->static_rate = uverb_attr->static_rate;
+ rdma_attr->port_num = uverb_attr->port_num;
+}
- ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
- out_len);
+static int modify_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_modify_qp *cmd)
+{
+ struct ib_qp_attr *attr;
+ struct ib_qp *qp;
+ int ret;
- attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle,
+ attrs);
if (!qp) {
ret = -EINVAL;
goto out;
}
- if ((cmd.attr_mask & IB_QP_PORT) &&
- !rdma_is_port_valid(qp->device, cmd.port_num)) {
+ if ((cmd->base.attr_mask & IB_QP_PORT) &&
+ !rdma_is_port_valid(qp->device, cmd->base.port_num)) {
ret = -EINVAL;
goto release_qp;
}
- if ((cmd.attr_mask & IB_QP_AV) &&
- !rdma_is_port_valid(qp->device, cmd.dest.port_num)) {
- ret = -EINVAL;
- goto release_qp;
+ if ((cmd->base.attr_mask & IB_QP_AV)) {
+ if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
+ if (cmd->base.attr_mask & IB_QP_STATE &&
+ cmd->base.qp_state == IB_QPS_RTR) {
+ /* We are in INIT->RTR TRANSITION (if we are not,
+ * this transition will be rejected in subsequent checks).
+ * In the INIT->RTR transition, we cannot have IB_QP_PORT set,
+ * but the IB_QP_STATE flag is required.
+ *
+ * Since kernel 3.14 (commit dbf727de7440), the uverbs driver,
+ * when IB_QP_AV is set, has required inclusion of a valid
+ * port number in the primary AV. (AVs are created and handled
+ * differently for infiniband and ethernet (RoCE) ports).
+ *
+ * Check the port number included in the primary AV against
+ * the port number in the qp struct, which was set (and saved)
+ * in the RST->INIT transition.
+ */
+ if (cmd->base.dest.port_num != qp->real_qp->port) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+ } else {
+ /* We are in SQD->SQD. (If we are not, this transition will
+ * be rejected later in the verbs layer checks).
+ * Check for both IB_QP_PORT and IB_QP_AV, these can be set
+ * together in the SQD->SQD transition.
+ *
+ * If only IP_QP_AV was set, add in IB_QP_PORT as well (the
+ * verbs layer driver does not track primary port changes
+ * resulting from path migration. Thus, in SQD, if the primary
+ * AV is modified, the primary port should also be modified).
+ *
+ * Note that in this transition, the IB_QP_STATE flag
+ * is not allowed.
+ */
+ if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
+ == (IB_QP_AV | IB_QP_PORT)) &&
+ cmd->base.port_num != cmd->base.dest.port_num) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+ if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
+ == IB_QP_AV) {
+ cmd->base.attr_mask |= IB_QP_PORT;
+ cmd->base.port_num = cmd->base.dest.port_num;
+ }
+ }
}
- if ((cmd.attr_mask & IB_QP_ALT_PATH) &&
- (!rdma_is_port_valid(qp->device, cmd.alt_port_num) ||
- !rdma_is_port_valid(qp->device, cmd.alt_dest.port_num))) {
+ if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
+ (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) ||
+ !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) ||
+ cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) {
ret = -EINVAL;
goto release_qp;
}
- attr->qp_state = cmd.qp_state;
- attr->cur_qp_state = cmd.cur_qp_state;
- attr->path_mtu = cmd.path_mtu;
- attr->path_mig_state = cmd.path_mig_state;
- attr->qkey = cmd.qkey;
- attr->rq_psn = cmd.rq_psn;
- attr->sq_psn = cmd.sq_psn;
- attr->dest_qp_num = cmd.dest_qp_num;
- attr->qp_access_flags = cmd.qp_access_flags;
- attr->pkey_index = cmd.pkey_index;
- attr->alt_pkey_index = cmd.alt_pkey_index;
- attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
- attr->max_rd_atomic = cmd.max_rd_atomic;
- attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
- attr->min_rnr_timer = cmd.min_rnr_timer;
- attr->port_num = cmd.port_num;
- attr->timeout = cmd.timeout;
- attr->retry_cnt = cmd.retry_cnt;
- attr->rnr_retry = cmd.rnr_retry;
- attr->alt_port_num = cmd.alt_port_num;
- attr->alt_timeout = cmd.alt_timeout;
-
- memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
- attr->ah_attr.dlid = cmd.dest.dlid;
- attr->ah_attr.sl = cmd.dest.sl;
- attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd.dest.static_rate;
- attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd.dest.port_num;
-
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
-
- if (qp->real_qp == qp) {
- if (cmd.attr_mask & IB_QP_AV) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
- goto release_qp;
- }
- ret = qp->device->modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
- } else {
- ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
- }
-
- if (ret)
+ if ((cmd->base.attr_mask & IB_QP_CUR_STATE &&
+ cmd->base.cur_qp_state > IB_QPS_ERR) ||
+ (cmd->base.attr_mask & IB_QP_STATE &&
+ cmd->base.qp_state > IB_QPS_ERR)) {
+ ret = -EINVAL;
goto release_qp;
+ }
- ret = in_len;
+ if (cmd->base.attr_mask & IB_QP_STATE)
+ attr->qp_state = cmd->base.qp_state;
+ if (cmd->base.attr_mask & IB_QP_CUR_STATE)
+ attr->cur_qp_state = cmd->base.cur_qp_state;
+ if (cmd->base.attr_mask & IB_QP_PATH_MTU)
+ attr->path_mtu = cmd->base.path_mtu;
+ if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
+ attr->path_mig_state = cmd->base.path_mig_state;
+ if (cmd->base.attr_mask & IB_QP_QKEY)
+ attr->qkey = cmd->base.qkey;
+ if (cmd->base.attr_mask & IB_QP_RQ_PSN)
+ attr->rq_psn = cmd->base.rq_psn;
+ if (cmd->base.attr_mask & IB_QP_SQ_PSN)
+ attr->sq_psn = cmd->base.sq_psn;
+ if (cmd->base.attr_mask & IB_QP_DEST_QPN)
+ attr->dest_qp_num = cmd->base.dest_qp_num;
+ if (cmd->base.attr_mask & IB_QP_ACCESS_FLAGS)
+ attr->qp_access_flags = cmd->base.qp_access_flags;
+ if (cmd->base.attr_mask & IB_QP_PKEY_INDEX)
+ attr->pkey_index = cmd->base.pkey_index;
+ if (cmd->base.attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
+ attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
+ if (cmd->base.attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ attr->max_rd_atomic = cmd->base.max_rd_atomic;
+ if (cmd->base.attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic;
+ if (cmd->base.attr_mask & IB_QP_MIN_RNR_TIMER)
+ attr->min_rnr_timer = cmd->base.min_rnr_timer;
+ if (cmd->base.attr_mask & IB_QP_PORT)
+ attr->port_num = cmd->base.port_num;
+ if (cmd->base.attr_mask & IB_QP_TIMEOUT)
+ attr->timeout = cmd->base.timeout;
+ if (cmd->base.attr_mask & IB_QP_RETRY_CNT)
+ attr->retry_cnt = cmd->base.retry_cnt;
+ if (cmd->base.attr_mask & IB_QP_RNR_RETRY)
+ attr->rnr_retry = cmd->base.rnr_retry;
+ if (cmd->base.attr_mask & IB_QP_ALT_PATH) {
+ attr->alt_port_num = cmd->base.alt_port_num;
+ attr->alt_timeout = cmd->base.alt_timeout;
+ attr->alt_pkey_index = cmd->base.alt_pkey_index;
+ }
+ if (cmd->base.attr_mask & IB_QP_RATE_LIMIT)
+ attr->rate_limit = cmd->rate_limit;
+
+ if (cmd->base.attr_mask & IB_QP_AV)
+ copy_ah_attr_from_uverbs(qp->device, &attr->ah_attr,
+ &cmd->base.dest);
+
+ if (cmd->base.attr_mask & IB_QP_ALT_PATH)
+ copy_ah_attr_from_uverbs(qp->device, &attr->alt_ah_attr,
+ &cmd->base.alt_dest);
+
+ ret = ib_modify_qp_with_udata(qp, attr,
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask),
+ &attrs->driver_udata);
release_qp:
- put_qp_read(qp);
-
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
out:
kfree(attr);
return ret;
}
-ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_destroy_qp cmd;
- struct ib_uverbs_destroy_qp_resp resp;
- struct ib_uobject *uobj;
- struct ib_qp *qp;
- struct ib_uqp_object *obj;
- int ret = -EINVAL;
+ struct ib_uverbs_ex_modify_qp cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd.base, sizeof(cmd.base));
+ if (ret)
+ return ret;
- memset(&resp, 0, sizeof resp);
+ if (cmd.base.attr_mask &
+ ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
+ return -EOPNOTSUPP;
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- qp = uobj->object;
- obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ return modify_qp(attrs, &cmd);
+}
- if (!list_empty(&obj->mcast_list)) {
- put_uobj_write(uobj);
- return -EBUSY;
- }
+static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_ex_modify_qp cmd;
+ struct ib_uverbs_ex_modify_qp_resp resp = {
+ .response_length = uverbs_response_length(attrs, sizeof(resp))
+ };
+ int ret;
- ret = ib_destroy_qp(qp);
- if (!ret)
- uobj->live = 0;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- put_uobj_write(uobj);
+ /*
+ * Last bit is reserved for extending the attr_mask by
+ * using another field.
+ */
+ BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
+
+ if (cmd.base.attr_mask &
+ ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+ return -EOPNOTSUPP;
+ ret = modify_qp(attrs, &cmd);
if (ret)
return ret;
- if (obj->uxrcd)
- atomic_dec(&obj->uxrcd->refcnt);
+ return uverbs_response(attrs, &resp, sizeof(resp));
+}
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
+static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_destroy_qp cmd;
+ struct ib_uverbs_destroy_qp_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_uqp_object *obj;
+ int ret;
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- ib_uverbs_release_uevent(file, &obj->uevent);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+ obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ memset(&resp, 0, sizeof(resp));
resp.events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
+ uobj_put_destroy(uobj);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
static void *alloc_wr(size_t wr_size, __u32 num_sge)
{
+ if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge))
+ return NULL;
+
return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
num_sge * sizeof (struct ib_sge), GFP_KERNEL);
-};
+}
-ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
@@ -2538,33 +2009,41 @@
struct ib_qp *qp;
int i, sg_ind;
int is_ud;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
size_t next_size;
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
- cmd.sge_count * sizeof (struct ib_uverbs_sge))
- return -EINVAL;
-
- if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
- return -EINVAL;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+ wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count);
+ if (IS_ERR(wqes))
+ return PTR_ERR(wqes);
+ sgls = uverbs_request_next_ptr(
+ &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge));
+ if (IS_ERR(sgls))
+ return PTR_ERR(sgls);
+ ret = uverbs_request_finish(&iter);
+ if (ret)
+ return ret;
user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
if (!user_wr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
- if (!qp)
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (!qp) {
+ ret = -EINVAL;
goto out;
+ }
is_ud = qp->qp_type == IB_QPT_UD;
sg_ind = 0;
last = NULL;
for (i = 0; i < cmd.wr_count; ++i) {
- if (copy_from_user(user_wr,
- buf + sizeof cmd + i * cmd.wqe_size,
+ if (copy_from_user(user_wr, (const u8 *)wqes + i * cmd.wqe_size,
cmd.wqe_size)) {
ret = -EFAULT;
goto out_put;
@@ -2591,7 +2070,8 @@
goto out_put;
}
- ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
+ ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
+ user_wr->wr.ud.ah, attrs);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2671,11 +2151,9 @@
if (next->num_sge) {
next->sg_list = (void *)((char *)next +
ALIGN(next_size, sizeof(struct ib_sge)));
- if (copy_from_user(next->sg_list,
- (const char *)buf + sizeof cmd +
- cmd.wr_count * cmd.wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto out_put;
}
@@ -2693,16 +2171,17 @@
break;
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out_put:
- put_qp_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
while (wr) {
if (is_ud && ud_wr(wr)->ah)
- put_ah_read(ud_wr(wr)->ah);
+ uobj_put_obj_read(ud_wr(wr)->ah);
next = wr->next;
kfree(wr);
wr = next;
@@ -2711,28 +2190,35 @@
out:
kfree(user_wr);
- return ret ? ret : in_len;
+ return ret;
}
-static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
- int in_len,
- u32 wr_count,
- u32 sge_count,
- u32 wqe_size)
+static struct ib_recv_wr *
+ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
+ u32 wqe_size, u32 sge_count)
{
struct ib_uverbs_recv_wr *user_wr;
struct ib_recv_wr *wr = NULL, *last, *next;
int sg_ind;
int i;
int ret;
-
- if (in_len < wqe_size * wr_count +
- sge_count * sizeof (struct ib_uverbs_sge))
- return ERR_PTR(-EINVAL);
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
return ERR_PTR(-EINVAL);
+ wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count);
+ if (IS_ERR(wqes))
+ return ERR_CAST(wqes);
+ sgls = uverbs_request_next_ptr(
+ iter, sge_count * sizeof(struct ib_uverbs_sge));
+ if (IS_ERR(sgls))
+ return ERR_CAST(sgls);
+ ret = uverbs_request_finish(iter);
+ if (ret)
+ return ERR_PTR(ret);
+
user_wr = kmalloc(wqe_size, GFP_KERNEL);
if (!user_wr)
return ERR_PTR(-ENOMEM);
@@ -2740,7 +2226,7 @@
sg_ind = 0;
last = NULL;
for (i = 0; i < wr_count; ++i) {
- if (copy_from_user(user_wr, buf + i * wqe_size,
+ if (copy_from_user(user_wr, (const char *)wqes + i * wqe_size,
wqe_size)) {
ret = -EFAULT;
goto err;
@@ -2751,6 +2237,13 @@
goto err;
}
+ if (user_wr->num_sge >=
+ (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
@@ -2772,10 +2265,9 @@
if (next->num_sge) {
next->sg_list = (void *)((char *)next +
ALIGN(sizeof *next, sizeof (struct ib_sge)));
- if (copy_from_user(next->sg_list,
- (const char *)buf + wr_count * wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto err;
}
@@ -2799,47 +2291,47 @@
return ERR_PTR(ret);
}
-ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_recv cmd;
struct ib_uverbs_post_recv_resp resp;
struct ib_recv_wr *wr, *next;
const struct ib_recv_wr *bad_wr;
struct ib_qp *qp;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
- if (!qp)
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (!qp) {
+ ret = -EINVAL;
goto out;
+ }
resp.bad_wr = 0;
ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
- put_qp_read(qp);
-
- if (ret)
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ if (ret) {
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
+ }
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
-
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
next = wr->next;
@@ -2847,38 +2339,39 @@
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_srq_recv cmd;
struct ib_uverbs_post_srq_recv_resp resp;
struct ib_recv_wr *wr, *next;
const struct ib_recv_wr *bad_wr;
struct ib_srq *srq;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
- if (!srq)
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+ if (!srq) {
+ ret = -EINVAL;
goto out;
+ }
resp.bad_wr = 0;
ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
- put_srq_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
for (next = wr; next; next = next->next) {
@@ -2887,9 +2380,9 @@
break;
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
@@ -2898,154 +2391,103 @@
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_ah cmd;
struct ib_uverbs_create_ah_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_ah *ah;
- struct ib_ah_attr attr;
+ struct ib_ah_attr attr = {};
int ret;
- struct ib_udata udata;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
+ struct ib_device *ib_dev;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
- return -EINVAL;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response + sizeof(resp)),
- in_len - sizeof(cmd), out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
- down_write(&uobj->mutex);
+ if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) {
+ ret = -EINVAL;
+ goto err;
+ }
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err;
}
- attr.dlid = cmd.attr.dlid;
- attr.sl = cmd.attr.sl;
- attr.src_path_bits = cmd.attr.src_path_bits;
- attr.static_rate = cmd.attr.static_rate;
- attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0;
- attr.port_num = cmd.attr.port_num;
- attr.grh.flow_label = cmd.attr.grh.flow_label;
- attr.grh.sgid_index = cmd.attr.grh.sgid_index;
- attr.grh.hop_limit = cmd.attr.grh.hop_limit;
- attr.grh.traffic_class = cmd.attr.grh.traffic_class;
- memset(&attr.dmac, 0, sizeof(attr.dmac));
- memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
-
- ah = pd->device->create_ah(pd, &attr, &udata);
+ attr.dlid = cmd.attr.dlid;
+ attr.sl = cmd.attr.sl;
+ attr.src_path_bits = cmd.attr.src_path_bits;
+ attr.static_rate = cmd.attr.static_rate;
+ attr.port_num = cmd.attr.port_num;
+
+ if (cmd.attr.is_global) {
+ struct ib_global_route *grh = &attr.grh;
+
+ grh->flow_label = cmd.attr.grh.flow_label;
+ grh->sgid_index = cmd.attr.grh.sgid_index;
+ grh->hop_limit = cmd.attr.grh.hop_limit;
+ grh->traffic_class = cmd.attr.grh.traffic_class;
+ memcpy(grh->dgid.raw, cmd.attr.grh.dgid, sizeof(grh->dgid));
+ attr.ah_flags = IB_AH_GRH;
+ } else {
+ attr.ah_flags = 0;
+ }
+ ah = ib_create_user_ah(pd, &attr, &attrs->driver_udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_put;
}
- ah->device = pd->device;
- ah->pd = pd;
- atomic_inc(&pd->usecnt);
ah->uobject = uobj;
+ uobj->user_handle = cmd.user_handle;
uobj->object = ah;
- ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
- if (ret)
- goto err_destroy;
-
resp.ah_handle = uobj->id;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
-
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->ah_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
- up_write(&uobj->mutex);
-
- return in_len;
+ uobj_put_obj_read(pd);
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
-
-err_destroy:
- ib_destroy_ah(ah);
+ ib_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE,
+ uverbs_get_cleared_udata(attrs));
err_put:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
return ret;
}
-ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_destroy_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_ah cmd;
- struct ib_ah *ah;
- struct ib_uobject *uobj;
- int ret;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- ah = uobj->object;
-
- ret = ib_destroy_ah(ah);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, attrs);
}
-ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_attach_mcast cmd;
struct ib_qp *qp;
@@ -3053,14 +2495,15 @@
struct ib_uverbs_mcast_entry *mcast;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
return -EINVAL;
- obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ obj = qp->uobject;
mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
@@ -3087,31 +2530,30 @@
out_put:
mutex_unlock(&obj->mcast_lock);
- put_qp_write(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_detach_mcast cmd;
struct ib_uqp_object *obj;
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
- int ret = -EINVAL;
+ int ret;
bool found = false;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
return -EINVAL;
- obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ obj = qp->uobject;
mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
@@ -3132,18 +2574,151 @@
out_put:
mutex_unlock(&obj->mcast_lock);
- put_qp_write(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ return ret;
+}
+
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
+{
+ struct ib_uflow_resources *resources;
+
+ resources = kzalloc(sizeof(*resources), GFP_KERNEL);
+
+ if (!resources)
+ return NULL;
+
+ if (!num_specs)
+ goto out;
+
+ resources->counters =
+ kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
+ resources->collection =
+ kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
+
+ if (!resources->counters || !resources->collection)
+ goto err;
+
+out:
+ resources->max = num_specs;
+ return resources;
+
+err:
+ kfree(resources->counters);
+ kfree(resources);
+
+ return NULL;
+}
+EXPORT_SYMBOL(flow_resources_alloc);
+
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
+{
+ unsigned int i;
+
+ if (!uflow_res)
+ return;
+
+ for (i = 0; i < uflow_res->collection_num; i++)
+ atomic_dec(&uflow_res->collection[i]->usecnt);
+
+ for (i = 0; i < uflow_res->counters_num; i++)
+ atomic_dec(&uflow_res->counters[i]->usecnt);
- return ret ? ret : in_len;
+ kfree(uflow_res->collection);
+ kfree(uflow_res->counters);
+ kfree(uflow_res);
}
+EXPORT_SYMBOL(ib_uverbs_flow_resources_free);
-static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ enum ib_flow_spec_type type,
+ void *ibobj)
{
- /* Returns user space filter size, includes padding */
- return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
+ WARN_ON(uflow_res->num >= uflow_res->max);
+
+ switch (type) {
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt);
+ uflow_res->collection[uflow_res->collection_num++] =
+ (struct ib_flow_action *)ibobj;
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ atomic_inc(&((struct ib_counters *)ibobj)->usecnt);
+ uflow_res->counters[uflow_res->counters_num++] =
+ (struct ib_counters *)ibobj;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ uflow_res->num++;
+}
+EXPORT_SYMBOL(flow_resources_add);
+
+static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
+{
+ ib_spec->type = kern_spec->type;
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (kern_spec->flow_tag.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_tag))
+ return -EINVAL;
+
+ ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+ ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (kern_spec->drop.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_drop))
+ return -EINVAL;
+
+ ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
+ break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ if (kern_spec->action.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_handle))
+ return -EOPNOTSUPP;
+ ib_spec->action.act = uobj_get_obj_read(flow_action,
+ UVERBS_OBJECT_FLOW_ACTION,
+ kern_spec->action.handle,
+ attrs);
+ if (!ib_spec->action.act)
+ return -EINVAL;
+ ib_spec->action.size =
+ sizeof(struct ib_flow_spec_action_handle);
+ flow_resources_add(uflow_res,
+ IB_FLOW_SPEC_ACTION_HANDLE,
+ ib_spec->action.act);
+ uobj_put_obj_read(ib_spec->action.act);
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (kern_spec->flow_count.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_count))
+ return -EINVAL;
+ ib_spec->flow_count.counters =
+ uobj_get_obj_read(counters,
+ UVERBS_OBJECT_COUNTERS,
+ kern_spec->flow_count.handle,
+ attrs);
+ if (!ib_spec->flow_count.counters)
+ return -EINVAL;
+ ib_spec->flow_count.size =
+ sizeof(struct ib_flow_spec_action_count);
+ flow_resources_add(uflow_res,
+ IB_FLOW_SPEC_ACTION_COUNT,
+ ib_spec->flow_count.counters);
+ uobj_put_obj_read(ib_spec->flow_count.counters);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
}
-static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
+static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
u16 ib_real_filter_sz)
{
/*
@@ -3152,7 +2727,7 @@
*/
if (kern_filter_size > ib_real_filter_sz) {
- if (memchr_inv((char *)kern_spec_filter +
+ if (memchr_inv((const char *)kern_spec_filter +
ib_real_filter_sz, 0,
kern_filter_size - ib_real_filter_sz))
return -EINVAL;
@@ -3161,30 +2736,25 @@
return kern_filter_size;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
- ssize_t kern_filter_sz;
ssize_t ib_filter_sz;
- void *kern_spec_mask;
- void *kern_spec_val;
-
- if (kern_spec->reserved)
- return -EINVAL;
-
- ib_spec->type = kern_spec->type;
- kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
/* User flow spec size must be aligned to 4 bytes */
if (kern_filter_sz != ALIGN(kern_filter_sz, 4))
return -EINVAL;
- kern_spec_val = (char *)kern_spec +
- sizeof(struct ib_uverbs_flow_spec_hdr);
- kern_spec_mask = (char *)kern_spec_val + kern_filter_sz;
+ ib_spec->type = type;
- switch (ib_spec->type) {
+ if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
+ return -EINVAL;
+
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
actual_filter_sz = spec_filter_size(kern_spec_mask,
@@ -3222,17 +2792,65 @@
(ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20))
return -EINVAL;
break;
- case IB_FLOW_SPEC_TCP:
- case IB_FLOW_SPEC_UDP:
- ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz);
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp);
+ memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
+ break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel);
+ memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz);
+
+ if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) ||
+ (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
+ return -EINVAL;
+ break;
+ case IB_FLOW_SPEC_ESP:
+ ib_filter_sz = offsetof(struct ib_flow_esp_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->esp.size = sizeof(struct ib_flow_spec_esp);
+ memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz);
+ break;
+ case IB_FLOW_SPEC_GRE:
+ ib_filter_sz = offsetof(struct ib_flow_gre_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->gre.size = sizeof(struct ib_flow_spec_gre);
+ memcpy(&ib_spec->gre.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->gre.mask, kern_spec_mask, actual_filter_sz);
+ break;
+ case IB_FLOW_SPEC_MPLS:
+ ib_filter_sz = offsetof(struct ib_flow_mpls_filter, real_sz);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
if (actual_filter_sz <= 0)
return -EINVAL;
- ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp);
- memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz);
- memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
+ ib_spec->mpls.size = sizeof(struct ib_flow_spec_mpls);
+ memcpy(&ib_spec->mpls.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->mpls.mask, kern_spec_mask, actual_filter_sz);
break;
default:
return -EINVAL;
@@ -3240,12 +2858,46 @@
return 0;
}
-int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ size_t kern_filter_sz;
+ void *kern_spec_mask;
+ void *kern_spec_val;
+
+ if (kern_spec->hdr.size < sizeof(struct ib_uverbs_flow_spec_hdr))
+ return -EINVAL;
+ kern_filter_sz = kern_spec->hdr.size - sizeof(struct ib_uverbs_flow_spec_hdr);
+ kern_filter_sz /= 2;
+
+ kern_spec_val = (u8 *)kern_spec +
+ sizeof(struct ib_uverbs_flow_spec_hdr);
+ kern_spec_mask = (u8 *)kern_spec_val + kern_filter_sz;
+
+ return ib_uverbs_kern_spec_to_ib_spec_filter(kern_spec->type,
+ kern_spec_mask,
+ kern_spec_val,
+ kern_filter_sz, ib_spec);
+}
+
+static int kern_spec_to_ib_spec(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
+{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+ return kern_spec_to_ib_spec_action(attrs, kern_spec, ib_spec,
+ uflow_res);
+ else
+ return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
+static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_wq cmd = {};
+ struct ib_uverbs_ex_create_wq cmd;
struct ib_uverbs_ex_create_wq_resp resp = {};
struct ib_uwq_object *obj;
int err = 0;
@@ -3253,44 +2905,27 @@
struct ib_pd *pd;
struct ib_wq *wq;
struct ib_wq_init_attr wq_init_attr = {};
- size_t required_cmd_sz;
- size_t required_resp_len;
-
- required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
- required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
+ struct ib_device *ib_dev;
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = kmalloc(sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
- &wq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3299,18 +2934,19 @@
wq_init_attr.cq = cq;
wq_init_attr.max_sge = cmd.max_sge;
wq_init_attr.max_wr = cmd.max_wr;
- wq_init_attr.wq_context = file;
+ wq_init_attr.wq_context = attrs->ufile;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
- obj->uevent.events_reported = 0;
+ wq_init_attr.create_flags = cmd.create_flags;
INIT_LIST_HEAD(&obj->uevent.event_list);
- wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
+
+ wq = pd->device->create_wq(pd, &wq_init_attr, &attrs->driver_udata);
if (IS_ERR(wq)) {
err = PTR_ERR(wq);
goto err_put_cq;
}
- wq->uobject = &obj->uevent.uobject;
+ wq->uobject = obj;
obj->uevent.uobject.object = wq;
wq->wq_type = wq_init_attr.wq_type;
wq->cq = cq;
@@ -3320,166 +2956,106 @@
atomic_set(&wq->usecnt, 0);
atomic_inc(&pd->usecnt);
atomic_inc(&cq->usecnt);
- wq->uobject = &obj->uevent.uobject;
+ wq->uobject = obj;
obj->uevent.uobject.object = wq;
- err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
- if (err)
- goto destroy_wq;
memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
resp.max_sge = wq_init_attr.max_sge;
resp.max_wr = wq_init_attr.max_wr;
resp.wqn = wq->wq_num;
- resp.response_length = required_resp_len;
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
- put_pd_read(pd);
- put_cq_read(cq);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
-destroy_wq:
- ib_destroy_wq(wq);
+ ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs));
err_put_cq:
- put_cq_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_put_pd:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_uobj:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return err;
}
-int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_wq cmd = {};
+ struct ib_uverbs_ex_destroy_wq cmd;
struct ib_uverbs_ex_destroy_wq_resp resp = {};
- struct ib_wq *wq;
struct ib_uobject *uobj;
struct ib_uwq_object *obj;
- size_t required_cmd_sz;
- size_t required_resp_len;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
- required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- resp.response_length = required_resp_len;
- uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- wq = uobj->object;
obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
- ret = ib_destroy_wq(wq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, &obj->uevent);
resp.events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
- ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
- if (ret)
- return ret;
+ uobj_put_destroy(uobj);
- return 0;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_wq cmd = {};
+ struct ib_uverbs_ex_modify_wq cmd;
struct ib_wq *wq;
struct ib_wq_attr wq_attr = {};
- size_t required_cmd_sz;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (!cmd.attr_mask)
return -EINVAL;
- if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+ if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = idr_read_wq(cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
if (!wq)
return -EINVAL;
wq_attr.curr_wq_state = cmd.curr_wq_state;
wq_attr.wq_state = cmd.wq_state;
- ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
- put_wq_read(wq);
+ if (cmd.attr_mask & IB_WQ_FLAGS) {
+ wq_attr.flags = cmd.flags;
+ wq_attr.flags_mask = cmd.flags_mask;
+ }
+ ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask,
+ &attrs->driver_udata);
+ rdma_lookup_put_uobject(&wq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
-int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_rwq_ind_table cmd = {};
+ struct ib_uverbs_ex_create_rwq_ind_table cmd;
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
struct ib_uobject *uobj;
- int err = 0;
+ int err;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_wq **wqs = NULL;
@@ -3487,26 +3063,13 @@
struct ib_wq *wq = NULL;
int i, j, num_read_wqs;
u32 num_wq_handles;
- u32 expected_in_size;
- size_t required_cmd_sz_header;
- size_t required_resp_len;
-
- required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
- required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
-
- if (ucore->inlen < required_cmd_sz_header)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
+ struct uverbs_req_iter iter;
+ struct ib_device *ib_dev;
- err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf = (const char *)ucore->inbuf + required_cmd_sz_header;
- ucore->inlen -= required_cmd_sz_header;
-
if (cmd.comp_mask)
return -EOPNOTSUPP;
@@ -3514,26 +3077,17 @@
return -EINVAL;
num_wq_handles = 1 << cmd.log_ind_tbl_size;
- expected_in_size = num_wq_handles * sizeof(__u32);
- if (num_wq_handles == 1)
- /* input size for wq handles is u64 aligned */
- expected_in_size += sizeof(__u32);
-
- if (ucore->inlen < expected_in_size)
- return -EINVAL;
-
- if (ucore->inlen > expected_in_size &&
- !ib_is_udata_cleared(ucore, expected_in_size,
- ucore->inlen - expected_in_size))
- return -EOPNOTSUPP;
-
wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
GFP_KERNEL);
if (!wqs_handles)
return -ENOMEM;
- err = ib_copy_from_udata(wqs_handles, ucore,
- num_wq_handles * sizeof(__u32));
+ err = uverbs_request_next(&iter, wqs_handles,
+ num_wq_handles * sizeof(__u32));
+ if (err)
+ goto err_free;
+
+ err = uverbs_request_finish(&iter);
if (err)
goto err_free;
@@ -3545,7 +3099,8 @@
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
+ wqs_handles[num_read_wqs], attrs);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3554,17 +3109,17 @@
wqs[num_read_wqs] = wq;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto put_wqs;
}
- init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
- down_write(&uobj->mutex);
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
- rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
+
+ rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr,
+ &attrs->driver_udata);
if (IS_ERR(rwq_ind_tbl)) {
err = PTR_ERR(rwq_ind_tbl);
@@ -3581,108 +3136,54 @@
for (i = 0; i < num_wq_handles; i++)
atomic_inc(&wqs[i]->usecnt);
- err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- if (err)
- goto destroy_ind_tbl;
-
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
- resp.response_length = required_resp_len;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
kfree(wqs_handles);
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ rdma_alloc_commit_uobject(uobj, attrs);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-destroy_ind_tbl:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
put_wqs:
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_free:
kfree(wqs_handles);
kfree(wqs);
return err;
}
-int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- struct ib_rwq_ind_table *rwq_ind_tbl;
- struct ib_uobject *uobj;
- int ret;
- struct ib_wq **ind_tbl;
- size_t required_cmd_sz;
-
- required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
+ struct ib_uverbs_ex_destroy_rwq_ind_table cmd;
+ int ret;
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- rwq_ind_tbl = uobj->object;
- ind_tbl = rwq_ind_tbl->ind_tbl;
-
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- kfree(ind_tbl);
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
+ cmd.ind_tbl_handle, attrs);
}
-int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
@@ -3691,24 +3192,18 @@
struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr;
struct ib_qp *qp;
- int err = 0;
- void *kern_spec;
+ struct ib_uflow_resources *uflow_res;
+ struct ib_uverbs_flow_spec_hdr *kern_spec;
+ struct uverbs_req_iter iter;
+ int err;
void *ib_spec;
int i;
+ struct ib_device *ib_dev;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- if (ucore->outlen < sizeof(resp))
- return -ENOSPC;
-
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf = (const char *)ucore->inbuf + sizeof(cmd);
- ucore->inlen -= sizeof(cmd);
-
if (cmd.comp_mask)
return -EINVAL;
@@ -3726,8 +3221,7 @@
if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
return -EINVAL;
- if (cmd.flow_attr.size > ucore->inlen ||
- cmd.flow_attr.size >
+ if (cmd.flow_attr.size >
(cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
return -EINVAL;
@@ -3741,35 +3235,47 @@
if (!kern_flow_attr)
return -ENOMEM;
- memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
- err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
- cmd.flow_attr.size);
+ *kern_flow_attr = cmd.flow_attr;
+ err = uverbs_request_next(&iter, &kern_flow_attr->flow_specs,
+ cmd.flow_attr.size);
if (err)
goto err_free_attr;
} else {
kern_flow_attr = &cmd.flow_attr;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ err = uverbs_request_finish(&iter);
+ if (err)
+ goto err_free_attr;
+
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, attrs, &ib_dev);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto err_free_attr;
}
- init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
- down_write(&uobj->mutex);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp) {
err = -EINVAL;
goto err_uobj;
}
- flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs *
- sizeof(union ib_flow_spec), GFP_KERNEL);
+ if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) {
+ err = -EINVAL;
+ goto err_put;
+ }
+
+ flow_attr = kzalloc(struct_size(flow_attr, flows,
+ cmd.flow_attr.num_of_specs), GFP_KERNEL);
if (!flow_attr) {
err = -ENOMEM;
goto err_put;
}
+ uflow_res = flow_resources_alloc(cmd.flow_attr.num_of_specs);
+ if (!uflow_res) {
+ err = -ENOMEM;
+ goto err_free_flow_attr;
+ }
flow_attr->type = kern_flow_attr->type;
flow_attr->priority = kern_flow_attr->priority;
@@ -3778,20 +3284,23 @@
flow_attr->flags = kern_flow_attr->flags;
flow_attr->size = sizeof(*flow_attr);
- kern_spec = kern_flow_attr + 1;
+ kern_spec = kern_flow_attr->flow_specs;
ib_spec = flow_attr + 1;
for (i = 0; i < flow_attr->num_of_specs &&
- cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
- cmd.flow_attr.size >=
- ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
- err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ cmd.flow_attr.size >= sizeof(*kern_spec) &&
+ cmd.flow_attr.size >= kern_spec->size;
+ i++) {
+ err = kern_spec_to_ib_spec(
+ attrs, (struct ib_uverbs_flow_spec *)kern_spec,
+ ib_spec, uflow_res);
if (err)
goto err_free;
+
flow_attr->size +=
((union ib_flow_spec *) ib_spec)->size;
- cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
- kern_spec = (char *)kern_spec + ((struct ib_uverbs_flow_spec *) kern_spec)->size;
- ib_spec = (char *)ib_spec + ((union ib_flow_spec *)ib_spec)->size;
+ cmd.flow_attr.size -= kern_spec->size;
+ kern_spec = (struct ib_uverbs_flow_spec_hdr *)((u8 *)kern_spec + kern_spec->size);
+ ib_spec = (u8 *)ib_spec + ((union ib_flow_spec *) ib_spec)->size;
}
if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
@@ -3799,100 +3308,65 @@
err = -EINVAL;
goto err_free;
}
- flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+
+ flow_id = qp->device->create_flow(
+ qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata);
+
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
goto err_free;
}
- flow_id->qp = qp;
- flow_id->uobject = uobj;
- uobj->object = flow_id;
- err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
- if (err)
- goto destroy_flow;
+ ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
- err = ib_copy_to_udata(ucore,
- &resp, sizeof(resp));
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
- put_qp_read(qp);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rule_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
+ rdma_alloc_commit_uobject(uobj, attrs);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-destroy_flow:
- ib_destroy_flow(flow_id);
+ if (!qp->device->destroy_flow(flow_id))
+ atomic_dec(&qp->usecnt);
err_free:
+ ib_uverbs_flow_resources_free(uflow_res);
+err_free_flow_attr:
kfree(flow_attr);
err_put:
- put_qp_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
err_free_attr:
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
return err;
}
-int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_flow cmd;
- struct ib_flow *flow_id;
- struct ib_uobject *uobj;
int ret;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- flow_id = uobj->object;
-
- ret = ib_destroy_flow(flow_id);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, attrs);
}
-static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
+static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
@@ -3903,64 +3377,82 @@
struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_srq_init_attr attr;
int ret;
+ struct ib_device *ib_dev;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ if (cmd->srq_type == IB_SRQT_TM)
+ attr.ext.tag_matching.max_num_tags = cmd->max_num_tags;
if (cmd->srq_type == IB_SRQT_XRC) {
- attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
- if (!attr.ext.xrc.xrcd) {
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
+ attrs);
+ if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
}
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd) {
+ ret = -EINVAL;
+ goto err_put_xrcd;
+ }
+
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
+ }
- attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
- if (!attr.ext.xrc.cq) {
+ if (ib_srq_has_cq(cmd->srq_type)) {
+ attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+ cmd->cq_handle, attrs);
+ if (!attr.ext.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
}
attr.event_handler = ib_uverbs_srq_event_handler;
- attr.srq_context = file;
+ attr.srq_context = attrs->ufile;
attr.srq_type = cmd->srq_type;
attr.attr.max_wr = cmd->max_wr;
attr.attr.max_sge = cmd->max_sge;
attr.attr.srq_limit = cmd->srq_limit;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
- srq = pd->device->create_srq(pd, &attr, udata);
- if (IS_ERR(srq)) {
- ret = PTR_ERR(srq);
+ srq = rdma_zalloc_drv_obj(ib_dev, ib_srq);
+ if (!srq) {
+ ret = -ENOMEM;
goto err_put;
}
srq->device = pd->device;
srq->pd = pd;
srq->srq_type = cmd->srq_type;
- srq->uobject = &obj->uevent.uobject;
+ srq->uobject = obj;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
+ ret = pd->device->create_srq(srq, &attr, udata);
+ if (ret)
+ goto err_free;
+
+ if (ib_srq_has_cq(cmd->srq_type)) {
+ srq->ext.cq = attr.ext.cq;
+ atomic_inc(&attr.ext.cq->usecnt);
+ }
+
if (cmd->srq_type == IB_SRQT_XRC) {
- srq->ext.xrc.cq = attr.ext.xrc.cq;
srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
- atomic_inc(&attr.ext.xrc.cq->usecnt);
atomic_inc(&attr.ext.xrc.xrcd->usecnt);
}
@@ -3968,9 +3460,7 @@
atomic_set(&srq->usecnt, 0);
obj->uevent.uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
memset(&resp, 0, sizeof resp);
resp.srq_handle = obj->uevent.uobject.id;
@@ -3979,69 +3469,57 @@
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user((void __user *) (unsigned long) cmd->response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
-
- if (cmd->srq_type == IB_SRQT_XRC) {
- put_uobj_read(xrcd_uobj);
- put_cq_read(attr.ext.xrc.cq);
- }
- put_pd_read(pd);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
+ if (cmd->srq_type == IB_SRQT_XRC)
+ uobj_put_read(xrcd_uobj);
- up_write(&obj->uevent.uobject.mutex);
+ if (ib_srq_has_cq(cmd->srq_type))
+ rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ uobj_put_obj_read(pd);
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
-
-err_destroy:
- ib_destroy_srq(srq);
-
+ ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs));
+ /* It was released in ib_destroy_srq_user */
+ srq = NULL;
+err_free:
+ kfree(srq);
err_put:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_put_cq:
- if (cmd->srq_type == IB_SRQT_XRC)
- put_cq_read(attr.ext.xrc.cq);
+ if (ib_srq_has_cq(cmd->srq_type))
+ rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
atomic_dec(&obj->uxrcd->refcnt);
- put_uobj_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
err:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return ret;
}
-ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_xsrq xcmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+ memset(&xcmd, 0, sizeof(xcmd));
xcmd.response = cmd.response;
xcmd.user_handle = cmd.user_handle;
xcmd.srq_type = IB_SRQT_BASIC;
@@ -4050,80 +3528,49 @@
xcmd.max_sge = cmd.max_sge;
xcmd.srq_limit = cmd.srq_limit;
- ib_uverbs_init_udata(&udata, buf + sizeof cmd,
- u64_to_user_ptr(cmd.response + sizeof resp),
- in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof resp);
-
- ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
- if (ret)
- return ret;
-
- return in_len;
+ return __uverbs_create_xsrq(attrs, &xcmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_create_xsrq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_xsrq cmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd,
- u64_to_user_ptr(cmd.response + sizeof resp),
- in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof resp);
-
- ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- return in_len;
+ return __uverbs_create_xsrq(attrs, &cmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_modify_srq cmd;
- struct ib_udata udata;
struct ib_srq *srq;
struct ib_srq_attr attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
- out_len);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (!srq)
return -EINVAL;
attr.max_wr = cmd.max_wr;
attr.srq_limit = cmd.srq_limit;
- ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
+ ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask,
+ &attrs->driver_udata);
- put_srq_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_srq cmd;
struct ib_uverbs_query_srq_resp resp;
@@ -4131,19 +3578,18 @@
struct ib_srq *srq;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (!srq)
return -EINVAL;
ret = ib_query_srq(srq, &attr);
- put_srq_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
return ret;
@@ -4154,85 +3600,49 @@
resp.max_sge = attr.max_sge;
resp.srq_limit = attr.srq_limit;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
struct ib_uobject *uobj;
- struct ib_srq *srq;
struct ib_uevent_object *obj;
- int ret = -EINVAL;
- struct ib_usrq_object *us;
- enum ib_srq_type srq_type;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- srq = uobj->object;
- obj = container_of(uobj, struct ib_uevent_object, uobject);
- srq_type = srq->srq_type;
-
- ret = ib_destroy_srq(srq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- if (srq_type == IB_SRQT_XRC) {
- us = container_of(obj, struct ib_usrq_object, uevent);
- atomic_dec(&us->uxrcd->refcnt);
- }
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, obj);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- memset(&resp, 0, sizeof resp);
+ obj = container_of(uobj, struct ib_uevent_object, uobject);
+ memset(&resp, 0, sizeof(resp));
resp.events_reported = obj->events_reported;
- put_uobj(uobj);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
+ uobj_put_destroy(uobj);
- return ret ? ret : in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_query_device_resp resp = { {0} };
+ struct ib_uverbs_ex_query_device_resp resp = {};
struct ib_uverbs_ex_query_device cmd;
struct ib_device_attr attr = {0};
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
int err;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
@@ -4242,21 +3652,12 @@
if (cmd.reserved)
return -EINVAL;
- resp.response_length = offsetof(typeof(resp), odp_caps);
-
- if (ucore->outlen < resp.response_length)
- return -ENOSPC;
-
- err = ib_dev->query_device(ib_dev, &attr, uhw);
+ err = ib_dev->query_device(ib_dev, &attr, &attrs->driver_udata);
if (err)
return err;
- copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
- goto end;
+ copy_query_dev_fields(ucontext, &resp.base, &attr);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
resp.odp_caps.general_caps = attr.odp_caps.general_caps;
resp.odp_caps.per_transport_caps.rc_odp_caps =
attr.odp_caps.per_transport_caps.rc_odp_caps;
@@ -4264,44 +3665,431 @@
attr.odp_caps.per_transport_caps.uc_odp_caps;
resp.odp_caps.per_transport_caps.ud_odp_caps =
attr.odp_caps.per_transport_caps.ud_odp_caps;
-#endif
- resp.response_length += sizeof(resp.odp_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
- goto end;
+ resp.xrc_odp_caps = attr.odp_caps.per_transport_caps.xrc_odp_caps;
resp.timestamp_mask = attr.timestamp_mask;
- resp.response_length += sizeof(resp.timestamp_mask);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
- goto end;
-
resp.hca_core_clock = attr.hca_core_clock;
- resp.response_length += sizeof(resp.hca_core_clock);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
- goto end;
-
resp.device_cap_flags_ex = attr.device_cap_flags;
- resp.response_length += sizeof(resp.device_cap_flags_ex);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps))
- goto end;
-
resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts;
resp.rss_caps.max_rwq_indirection_tables =
attr.rss_caps.max_rwq_indirection_tables;
resp.rss_caps.max_rwq_indirection_table_size =
attr.rss_caps.max_rwq_indirection_table_size;
+ resp.max_wq_type_rq = attr.max_wq_type_rq;
+ resp.raw_packet_caps = attr.raw_packet_caps;
+ resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size;
+ resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags;
+ resp.tm_caps.max_ops = attr.tm_caps.max_ops;
+ resp.tm_caps.max_sge = attr.tm_caps.max_sge;
+ resp.tm_caps.flags = attr.tm_caps.flags;
+ resp.cq_moderation_caps.max_cq_moderation_count =
+ attr.cq_caps.max_cq_moderation_count;
+ resp.cq_moderation_caps.max_cq_moderation_period =
+ attr.cq_caps.max_cq_moderation_period;
+ resp.max_dm_size = attr.max_dm_size;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+
+ return uverbs_response(attrs, &resp, sizeof(resp));
+}
- resp.response_length += sizeof(resp.rss_caps);
+static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_ex_modify_cq cmd;
+ struct ib_cq *cq;
+ int ret;
- if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq))
- goto end;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- resp.max_wq_type_rq = attr.max_wq_type_rq;
- resp.response_length += sizeof(resp.max_wq_type_rq);
-end:
- err = ib_copy_to_udata(ucore, &resp, resp.response_length);
- return err;
+ if (!cmd.attr_mask || cmd.reserved)
+ return -EINVAL;
+
+ if (cmd.attr_mask > IB_CQ_MODERATE)
+ return -EOPNOTSUPP;
+
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
+ if (!cq)
+ return -EINVAL;
+
+ ret = ib_modify_cq(cq, cmd.attr.cq_count, cmd.attr.cq_period);
+
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ return ret;
}
+
+/*
+ * Describe the input structs for write(). Some write methods have an input
+ * only struct, most have an input and output. If the struct has an output then
+ * the 'response' u64 must be the first field in the request structure.
+ *
+ * If udata is present then both the request and response structs have a
+ * trailing driver_data flex array. In this case the size of the base struct
+ * cannot be changed.
+ */
+#define UAPI_DEF_WRITE_IO(req, resp) \
+ .write.has_resp = 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \
+ BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \
+ sizeof(u64)), \
+ .write.req_size = sizeof(req), .write.resp_size = sizeof(resp)
+
+#define UAPI_DEF_WRITE_I(req) .write.req_size = sizeof(req)
+
+#define UAPI_DEF_WRITE_UDATA_IO(req, resp) \
+ UAPI_DEF_WRITE_IO(req, resp), \
+ .write.has_udata = \
+ 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req)) + \
+ BUILD_BUG_ON_ZERO(offsetof(resp, driver_data) != \
+ sizeof(resp))
+
+#define UAPI_DEF_WRITE_UDATA_I(req) \
+ UAPI_DEF_WRITE_I(req), \
+ .write.has_udata = \
+ 1 + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req))
+
+/*
+ * The _EX versions are for use with WRITE_EX and allow the last struct member
+ * to be specified. Buffers that do not include that member will be rejected.
+ */
+#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \
+ .write.has_resp = 1, \
+ .write.req_size = offsetofend(req, req_last_member), \
+ .write.resp_size = offsetofend(resp, resp_last_member)
+
+#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \
+ .write.req_size = offsetofend(req, req_last_member)
+
+const struct uapi_definition uverbs_def_write_intf[] = {
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_AH,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_AH,
+ ib_uverbs_create_ah,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_ah,
+ struct ib_uverbs_create_ah_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_ah)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_AH,
+ ib_uverbs_destroy_ah,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_COMP_CHANNEL,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
+ ib_uverbs_create_comp_channel,
+ UAPI_DEF_WRITE_IO(
+ struct ib_uverbs_create_comp_channel,
+ struct ib_uverbs_create_comp_channel_resp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_CQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_CQ,
+ ib_uverbs_create_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_cq,
+ struct ib_uverbs_create_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_CQ,
+ ib_uverbs_destroy_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_cq,
+ struct ib_uverbs_destroy_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POLL_CQ,
+ ib_uverbs_poll_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_poll_cq,
+ struct ib_uverbs_poll_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(poll_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
+ ib_uverbs_req_notify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_req_notify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(req_notify_cq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_RESIZE_CQ,
+ ib_uverbs_resize_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_resize_cq,
+ struct ib_uverbs_resize_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(resize_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_CQ,
+ ib_uverbs_ex_create_cq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_cq,
+ reserved,
+ struct ib_uverbs_ex_create_cq_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_CQ,
+ ib_uverbs_ex_modify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_DEVICE,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_GET_CONTEXT,
+ ib_uverbs_get_context,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_get_context,
+ struct ib_uverbs_get_context_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_DEVICE,
+ ib_uverbs_query_device,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_device,
+ struct ib_uverbs_query_device_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_PORT,
+ ib_uverbs_query_port,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_port,
+ struct ib_uverbs_query_port_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_port)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_QUERY_DEVICE,
+ ib_uverbs_ex_query_device,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_query_device,
+ reserved,
+ struct ib_uverbs_ex_query_device_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(query_device)),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_ucontext),
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_ucontext)),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_FLOW,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_FLOW,
+ ib_uverbs_ex_create_flow,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_create_flow,
+ flow_attr,
+ struct ib_uverbs_create_flow_resp,
+ flow_handle),
+ UAPI_DEF_METHOD_NEEDS_FN(create_flow)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
+ ib_uverbs_ex_destroy_flow,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_flow),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_flow))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MR,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_DEREG_MR,
+ ib_uverbs_dereg_mr,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dereg_mr),
+ UAPI_DEF_METHOD_NEEDS_FN(dereg_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REG_MR,
+ ib_uverbs_reg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr,
+ struct ib_uverbs_reg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REREG_MR,
+ ib_uverbs_rereg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr,
+ struct ib_uverbs_rereg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MW,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_MW,
+ ib_uverbs_alloc_mw,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_mw,
+ struct ib_uverbs_alloc_mw_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_mw)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_MW,
+ ib_uverbs_dealloc_mw,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_mw),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_mw))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_PD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_PD,
+ ib_uverbs_alloc_pd,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_pd,
+ struct ib_uverbs_alloc_pd_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_pd)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_PD,
+ ib_uverbs_dealloc_pd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_pd),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_pd))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_QP,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ATTACH_MCAST,
+ ib_uverbs_attach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_QP,
+ ib_uverbs_create_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_qp,
+ struct ib_uverbs_create_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_QP,
+ ib_uverbs_destroy_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_qp,
+ struct ib_uverbs_destroy_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DETACH_MCAST,
+ ib_uverbs_detach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_detach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_QP,
+ ib_uverbs_modify_qp,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_modify_qp),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_RECV,
+ ib_uverbs_post_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_recv,
+ struct ib_uverbs_post_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SEND,
+ ib_uverbs_post_send,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_send,
+ struct ib_uverbs_post_send_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_send)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_QP,
+ ib_uverbs_query_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_qp,
+ struct ib_uverbs_query_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_QP,
+ ib_uverbs_ex_create_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_qp,
+ comp_mask,
+ struct ib_uverbs_ex_create_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_QP,
+ ib_uverbs_ex_modify_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_modify_qp,
+ base,
+ struct ib_uverbs_ex_modify_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
+ ib_uverbs_ex_create_rwq_ind_table,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_create_rwq_ind_table,
+ log_ind_tbl_size,
+ struct ib_uverbs_ex_create_rwq_ind_table_resp,
+ ind_tbl_num),
+ UAPI_DEF_METHOD_NEEDS_FN(create_rwq_ind_table)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL,
+ ib_uverbs_ex_destroy_rwq_ind_table,
+ UAPI_DEF_WRITE_I(
+ struct ib_uverbs_ex_destroy_rwq_ind_table),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_rwq_ind_table))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_WQ,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_WQ,
+ ib_uverbs_ex_create_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_wq,
+ max_sge,
+ struct ib_uverbs_ex_create_wq_resp,
+ wqn),
+ UAPI_DEF_METHOD_NEEDS_FN(create_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_WQ,
+ ib_uverbs_ex_destroy_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_destroy_wq,
+ wq_handle,
+ struct ib_uverbs_ex_destroy_wq_resp,
+ reserved),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_WQ,
+ ib_uverbs_ex_modify_wq,
+ UAPI_DEF_WRITE_I_EX(struct ib_uverbs_ex_modify_wq,
+ curr_wq_state),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_wq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_SRQ,
+ ib_uverbs_create_srq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_srq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_XSRQ,
+ ib_uverbs_create_xsrq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_xsrq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_SRQ,
+ ib_uverbs_destroy_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_srq,
+ struct ib_uverbs_destroy_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_SRQ,
+ ib_uverbs_modify_srq,
+ UAPI_DEF_WRITE_UDATA_I(struct ib_uverbs_modify_srq),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SRQ_RECV,
+ ib_uverbs_post_srq_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_srq_recv,
+ struct ib_uverbs_post_srq_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_srq_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_SRQ,
+ ib_uverbs_query_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_srq,
+ struct ib_uverbs_query_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_srq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_XRCD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CLOSE_XRCD,
+ ib_uverbs_close_xrcd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP,
+ ib_uverbs_open_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_qp,
+ struct ib_uverbs_create_qp_resp)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_XRCD,
+ ib_uverbs_open_xrcd,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_xrcd,
+ struct ib_uverbs_open_xrcd_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))),
+
+ {},
+};
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_ioctl.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_ioctl.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_ioctl.c
@@ -0,0 +1,767 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/radix-tree.h>
+#include <linux/file.h>
+#include <linux/overflow.h>
+
+#include <rdma/rdma_user_ioctl.h>
+#include <rdma/uverbs_ioctl.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+struct bundle_alloc_head {
+ struct bundle_alloc_head *next;
+ uint8_t data[0];
+};
+
+struct bundle_priv {
+ /* Must be first */
+ struct bundle_alloc_head alloc_head;
+ struct bundle_alloc_head *allocated_mem;
+ size_t internal_avail;
+ size_t internal_used;
+
+ struct radix_tree_root *radix;
+ const struct uverbs_api_ioctl_method *method_elm;
+ u32 method_key;
+
+ struct ib_uverbs_attr __user *user_attrs;
+ struct ib_uverbs_attr *uattrs;
+
+ DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
+ DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
+
+ /*
+ * Must be last. bundle ends in a flex array which overlaps
+ * internal_buffer.
+ */
+ struct uverbs_attr_bundle bundle;
+ u64 internal_buffer[32];
+};
+
+/*
+ * Each method has an absolute minimum amount of memory it needs to allocate,
+ * precompute that amount and determine if the onstack memory can be used or
+ * if allocation is need.
+ */
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+ unsigned int num_attrs)
+{
+ struct bundle_priv *pbundle;
+ size_t bundle_size =
+ offsetof(struct bundle_priv, internal_buffer) +
+ sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len +
+ sizeof(*pbundle->uattrs) * num_attrs;
+
+ method_elm->use_stack = bundle_size <= sizeof(*pbundle);
+ method_elm->bundle_size =
+ ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer));
+
+ /* Do not want order-2 allocations for this. */
+ WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE);
+}
+
+/**
+ * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * @bundle: The bundle
+ * @size: Number of bytes to allocate
+ * @flags: Allocator flags
+ *
+ * The bundle allocator is intended for allocations that are connected with
+ * processing the system call related to the bundle. The allocated memory is
+ * always freed once the system call completes, and cannot be freed any other
+ * way.
+ *
+ * This tries to use a small pool of pre-allocated memory for performance.
+ */
+__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
+ gfp_t flags)
+{
+ struct bundle_priv *pbundle =
+ container_of(bundle, struct bundle_priv, bundle);
+ size_t new_used;
+ void *res;
+
+ new_used = size + pbundle->internal_used;
+ if (new_used < size)
+ return ERR_PTR(-EOVERFLOW);
+
+ if (new_used > pbundle->internal_avail) {
+ struct bundle_alloc_head *buf;
+
+ buf = kvmalloc(struct_size(buf, data, size), flags);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+ buf->next = pbundle->allocated_mem;
+ pbundle->allocated_mem = buf;
+ return buf->data;
+ }
+
+ res = (u8 *)pbundle->internal_buffer + pbundle->internal_used;
+ pbundle->internal_used =
+ ALIGN(new_used, sizeof(*pbundle->internal_buffer));
+ if (flags & __GFP_ZERO)
+ memset(res, 0, size);
+ return res;
+}
+EXPORT_SYMBOL(_uverbs_alloc);
+
+static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
+ u16 len)
+{
+ if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data))
+ return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data + len),
+ uattr->len - len);
+
+ return !memchr_inv((const u8 *)&uattr->data + len,
+ 0, uattr->len - len);
+}
+
+static int uverbs_set_output(const struct uverbs_attr_bundle *bundle,
+ const struct uverbs_attr *attr)
+{
+ struct bundle_priv *pbundle =
+ container_of(bundle, struct bundle_priv, bundle);
+ u16 flags;
+
+ flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags |
+ UVERBS_ATTR_F_VALID_OUTPUT;
+ if (put_user(flags,
+ &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags))
+ return -EFAULT;
+ return 0;
+}
+
+static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
+ const struct uverbs_api_attr *attr_uapi,
+ struct uverbs_objs_arr_attr *attr,
+ struct ib_uverbs_attr *uattr,
+ u32 attr_bkey)
+{
+ const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+ size_t array_len;
+ u32 *idr_vals;
+ int ret = 0;
+ size_t i;
+
+ if (uattr->attr_data.reserved)
+ return -EINVAL;
+
+ if (uattr->len % sizeof(u32))
+ return -EINVAL;
+
+ array_len = uattr->len / sizeof(u32);
+ if (array_len < spec->u2.objs_arr.min_len ||
+ array_len > spec->u2.objs_arr.max_len)
+ return -EINVAL;
+
+ attr->uobjects =
+ uverbs_alloc(&pbundle->bundle,
+ array_size(array_len, sizeof(*attr->uobjects)));
+ if (IS_ERR(attr->uobjects))
+ return PTR_ERR(attr->uobjects);
+
+ /*
+ * Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects
+ * to store idrs array and avoid additional memory allocation. The
+ * idrs array is offset to the end of the uobjects array so we will be
+ * able to read idr and replace with a pointer.
+ */
+ idr_vals = (u32 *)(attr->uobjects + array_len) - array_len;
+
+ if (uattr->len > sizeof(uattr->data)) {
+ ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data),
+ uattr->len);
+ if (ret)
+ return -EFAULT;
+ } else {
+ memcpy(idr_vals, &uattr->data, uattr->len);
+ }
+
+ for (i = 0; i != array_len; i++) {
+ attr->uobjects[i] = uverbs_get_uobject_from_file(
+ spec->u2.objs_arr.obj_type, spec->u2.objs_arr.access,
+ idr_vals[i], &pbundle->bundle);
+ if (IS_ERR(attr->uobjects[i])) {
+ ret = PTR_ERR(attr->uobjects[i]);
+ break;
+ }
+ }
+
+ attr->len = i;
+ __set_bit(attr_bkey, pbundle->spec_finalize);
+ return ret;
+}
+
+static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
+ struct uverbs_objs_arr_attr *attr,
+ bool commit,
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+ size_t i;
+
+ for (i = 0; i != attr->len; i++)
+ uverbs_finalize_object(attr->uobjects[i],
+ spec->u2.objs_arr.access, commit, attrs);
+}
+
+static int uverbs_process_attr(struct bundle_priv *pbundle,
+ const struct uverbs_api_attr *attr_uapi,
+ struct ib_uverbs_attr *uattr, u32 attr_bkey)
+{
+ const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+ struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey];
+ const struct uverbs_attr_spec *val_spec = spec;
+ struct uverbs_obj_attr *o_attr;
+
+ switch (spec->type) {
+ case UVERBS_ATTR_TYPE_ENUM_IN:
+ if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems)
+ return -EOPNOTSUPP;
+
+ if (uattr->attr_data.enum_data.reserved)
+ return -EINVAL;
+
+ val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id];
+
+ /* Currently we only support PTR_IN based enums */
+ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
+ return -EOPNOTSUPP;
+
+ e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id;
+ /* fall through */
+ case UVERBS_ATTR_TYPE_PTR_IN:
+ /* Ensure that any data provided by userspace beyond the known
+ * struct is zero. Userspace that knows how to use some future
+ * longer struct will fail here if used with an old kernel and
+ * non-zero content, making ABI compat/discovery simpler.
+ */
+ if (uattr->len > val_spec->u.ptr.len &&
+ val_spec->zero_trailing &&
+ !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
+ return -EOPNOTSUPP;
+
+ /* fall through */
+ case UVERBS_ATTR_TYPE_PTR_OUT:
+ if (uattr->len < val_spec->u.ptr.min_len ||
+ (!val_spec->zero_trailing &&
+ uattr->len > val_spec->u.ptr.len))
+ return -EINVAL;
+
+ if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
+ uattr->attr_data.reserved)
+ return -EINVAL;
+
+ e->ptr_attr.uattr_idx = uattr - pbundle->uattrs;
+ e->ptr_attr.len = uattr->len;
+
+ if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
+ void *p;
+
+ p = uverbs_alloc(&pbundle->bundle, uattr->len);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ e->ptr_attr.ptr = p;
+
+ if (copy_from_user(p, u64_to_user_ptr(uattr->data),
+ uattr->len))
+ return -EFAULT;
+ } else {
+ e->ptr_attr.data = uattr->data;
+ }
+ break;
+
+ case UVERBS_ATTR_TYPE_IDR:
+ case UVERBS_ATTR_TYPE_FD:
+ if (uattr->attr_data.reserved)
+ return -EINVAL;
+
+ if (uattr->len != 0)
+ return -EINVAL;
+
+ o_attr = &e->obj_attr;
+ o_attr->attr_elm = attr_uapi;
+
+ /*
+ * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and
+ * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64
+ * here without caring about truncation as we know that the
+ * IDR implementation today rejects negative IDs
+ */
+ o_attr->uobject = uverbs_get_uobject_from_file(
+ spec->u.obj.obj_type, spec->u.obj.access,
+ uattr->data_s64, &pbundle->bundle);
+ if (IS_ERR(o_attr->uobject))
+ return PTR_ERR(o_attr->uobject);
+ __set_bit(attr_bkey, pbundle->uobj_finalize);
+
+ if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
+ unsigned int uattr_idx = uattr - pbundle->uattrs;
+ s64 id = o_attr->uobject->id;
+
+ /* Copy the allocated id to the user-space */
+ if (put_user(id, &pbundle->user_attrs[uattr_idx].data))
+ return -EFAULT;
+ }
+
+ break;
+
+ case UVERBS_ATTR_TYPE_IDRS_ARRAY:
+ return uverbs_process_idrs_array(pbundle, attr_uapi,
+ &e->objs_arr_attr, uattr,
+ attr_bkey);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void *uapi_get_attr_for_method(struct bundle_priv *pbundle,
+ u32 attr_key)
+{
+ return radix_tree_lookup(pbundle->radix,
+ pbundle->method_key | attr_key);
+}
+
+static int uverbs_set_attr(struct bundle_priv *pbundle,
+ struct ib_uverbs_attr *uattr)
+{
+ u32 attr_key = uapi_key_attr(uattr->attr_id);
+ u32 attr_bkey = uapi_bkey_attr(attr_key);
+ const struct uverbs_api_attr *attr;
+ void *slot;
+ int ret;
+
+ slot = uapi_get_attr_for_method(pbundle, attr_key);
+ if (!slot) {
+ /*
+ * Kernel does not support the attribute but user-space says it
+ * is mandatory
+ */
+ if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
+ return -EPROTONOSUPPORT;
+ return 0;
+ }
+ attr = slot;
+
+ /* Reject duplicate attributes from user-space */
+ if (test_bit(attr_bkey, pbundle->bundle.attr_present))
+ return -EINVAL;
+
+ ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey);
+ if (ret)
+ return ret;
+
+ __set_bit(attr_bkey, pbundle->bundle.attr_present);
+
+ return 0;
+}
+
+static int ib_uverbs_run_method(struct bundle_priv *pbundle,
+ unsigned int num_attrs)
+{
+ int (*handler)(struct uverbs_attr_bundle *attrs);
+ size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
+ unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
+ unsigned int i;
+ int ret;
+
+ /* See uverbs_disassociate_api() */
+ handler = srcu_dereference(
+ pbundle->method_elm->handler,
+ &pbundle->bundle.ufile->device->disassociate_srcu);
+ if (!handler)
+ return -EIO;
+
+ pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size);
+ if (IS_ERR(pbundle->uattrs))
+ return PTR_ERR(pbundle->uattrs);
+ if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size))
+ return -EFAULT;
+
+ for (i = 0; i != num_attrs; i++) {
+ ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]);
+ if (unlikely(ret))
+ return ret;
+ }
+
+ /* User space did not provide all the mandatory attributes */
+ if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory,
+ pbundle->bundle.attr_present,
+ pbundle->method_elm->key_bitmap_len)))
+ return -EINVAL;
+
+ if (pbundle->method_elm->has_udata)
+ uverbs_fill_udata(&pbundle->bundle,
+ &pbundle->bundle.driver_udata,
+ UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT);
+ else
+ pbundle->bundle.driver_udata = (struct ib_udata){};
+
+ if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
+ struct uverbs_obj_attr *destroy_attr =
+ &pbundle->bundle.attrs[destroy_bkey].obj_attr;
+
+ ret = uobj_destroy(destroy_attr->uobject, &pbundle->bundle);
+ if (ret)
+ return ret;
+ __clear_bit(destroy_bkey, pbundle->uobj_finalize);
+
+ ret = handler(&pbundle->bundle);
+ uobj_put_destroy(destroy_attr->uobject);
+ } else {
+ ret = handler(&pbundle->bundle);
+ }
+
+ /*
+ * Until the drivers are revised to use the bundle directly we have to
+ * assume that the driver wrote to its UHW_OUT and flag userspace
+ * appropriately.
+ */
+ if (!ret && pbundle->method_elm->has_udata) {
+ const struct uverbs_attr *attr =
+ uverbs_attr_get(&pbundle->bundle, UVERBS_ATTR_UHW_OUT);
+
+ if (!IS_ERR(attr))
+ ret = uverbs_set_output(&pbundle->bundle, attr);
+ }
+
+ /*
+ * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
+ * not invoke the method because the request is not supported. No
+ * other cases should return this code.
+ */
+ if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT))
+ return -EINVAL;
+
+ return ret;
+}
+
+static void bundle_destroy(struct bundle_priv *pbundle, bool commit)
+{
+ unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
+ struct bundle_alloc_head *memblock;
+ unsigned int i;
+
+ /* fast path for simple uobjects */
+ i = -1;
+ while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
+ i + 1)) < key_bitmap_len) {
+ struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
+
+ uverbs_finalize_object(
+ attr->obj_attr.uobject,
+ attr->obj_attr.attr_elm->spec.u.obj.access, commit,
+ &pbundle->bundle);
+ }
+
+ i = -1;
+ while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
+ i + 1)) < key_bitmap_len) {
+ struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
+ const struct uverbs_api_attr *attr_uapi;
+ void *slot;
+
+ slot = uapi_get_attr_for_method(
+ pbundle,
+ pbundle->method_key | uapi_bkey_to_key_attr(i));
+ if (WARN_ON(!slot))
+ continue;
+
+ attr_uapi = slot;
+
+ if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
+ uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr,
+ commit, &pbundle->bundle);
+ }
+ }
+
+ for (memblock = pbundle->allocated_mem; memblock;) {
+ struct bundle_alloc_head *tmp = memblock;
+
+ memblock = memblock->next;
+ kvfree(tmp);
+ }
+}
+
+static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
+ struct ib_uverbs_ioctl_hdr *hdr,
+ struct ib_uverbs_attr __user *user_attrs)
+{
+ const struct uverbs_api_ioctl_method *method_elm;
+ struct uverbs_api *uapi = ufile->device->uapi;
+ struct bundle_priv *pbundle;
+ struct bundle_priv onstack;
+ void *slot;
+ int ret;
+
+ if (unlikely(hdr->driver_id != uapi->driver_id))
+ return -EINVAL;
+
+ slot = radix_tree_lookup(
+ &uapi->radix,
+ uapi_key_obj(hdr->object_id) |
+ uapi_key_ioctl_method(hdr->method_id));
+ if (unlikely(!slot))
+ return -EPROTONOSUPPORT;
+ method_elm = slot;
+
+ if (!method_elm->use_stack) {
+ pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
+ if (!pbundle)
+ return -ENOMEM;
+ pbundle->internal_avail =
+ method_elm->bundle_size -
+ offsetof(struct bundle_priv, internal_buffer);
+ pbundle->alloc_head.next = NULL;
+ pbundle->allocated_mem = &pbundle->alloc_head;
+ } else {
+ pbundle = &onstack;
+ pbundle->internal_avail = sizeof(pbundle->internal_buffer);
+ pbundle->allocated_mem = NULL;
+ }
+
+ /* Space for the pbundle->bundle.attrs flex array */
+ pbundle->method_elm = method_elm;
+ pbundle->method_key =
+ uapi_key_obj(hdr->object_id) |
+ uapi_key_ioctl_method(hdr->method_id);
+ pbundle->bundle.ufile = ufile;
+ pbundle->bundle.context = NULL; /* only valid if bundle has uobject */
+ pbundle->radix = &uapi->radix;
+ pbundle->user_attrs = user_attrs;
+
+ pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len *
+ sizeof(*pbundle->bundle.attrs),
+ sizeof(*pbundle->internal_buffer));
+ memset(pbundle->bundle.attr_present, 0,
+ sizeof(pbundle->bundle.attr_present));
+ memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
+ memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
+
+ ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
+ bundle_destroy(pbundle, ret == 0);
+ return ret;
+}
+
+long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_ioctl_hdr __user *user_hdr =
+ (struct ib_uverbs_ioctl_hdr __user *)arg;
+ struct ib_uverbs_ioctl_hdr hdr;
+ int srcu_key;
+ int err;
+
+ if (unlikely(cmd != RDMA_VERBS_IOCTL))
+ return -ENOIOCTLCMD;
+
+ err = copy_from_user(&hdr, user_hdr, sizeof(hdr));
+ if (err)
+ return -EFAULT;
+
+ if (hdr.length > PAGE_SIZE ||
+ hdr.length != struct_size(&hdr, attrs, hdr.num_attrs))
+ return -EINVAL;
+
+ if (hdr.reserved1 || hdr.reserved2)
+ return -EPROTONOSUPPORT;
+
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+ err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs);
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+ return err;
+}
+
+int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits)
+{
+ const struct uverbs_attr *attr;
+ u64 flags;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ /* Missing attribute means 0 flags */
+ if (IS_ERR(attr)) {
+ *to = 0;
+ return 0;
+ }
+
+ /*
+ * New userspace code should use 8 bytes to pass flags, but we
+ * transparently support old userspaces that were using 4 bytes as
+ * well.
+ */
+ if (attr->ptr_attr.len == 8)
+ flags = attr->ptr_attr.data;
+ else if (attr->ptr_attr.len == 4)
+ flags = *(const u32 *)&attr->ptr_attr.data;
+ else
+ return -EINVAL;
+
+ if (flags & ~allowed_bits)
+ return -EINVAL;
+
+ *to = flags;
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags64);
+
+int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits)
+{
+ u64 flags;
+ int ret;
+
+ ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits);
+ if (ret)
+ return ret;
+
+ if (flags > U32_MAX)
+ return -EINVAL;
+ *to = flags;
+
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags32);
+
+/*
+ * Fill a ib_udata struct (core or uhw) using the given attribute IDs.
+ * This is primarily used to convert the UVERBS_ATTR_UHW() into the
+ * ib_udata format used by the drivers.
+ */
+void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
+ struct ib_udata *udata, unsigned int attr_in,
+ unsigned int attr_out)
+{
+ struct bundle_priv *pbundle =
+ container_of(bundle, struct bundle_priv, bundle);
+ const struct uverbs_attr *in =
+ uverbs_attr_get(&pbundle->bundle, attr_in);
+ const struct uverbs_attr *out =
+ uverbs_attr_get(&pbundle->bundle, attr_out);
+
+ if (!IS_ERR(in)) {
+ udata->inlen = in->ptr_attr.len;
+ if (uverbs_attr_ptr_is_inline(in))
+ udata->inbuf = (void *)
+ &pbundle->user_attrs[in->ptr_attr.uattr_idx]
+ .data;
+ else
+ udata->inbuf = u64_to_user_ptr(in->ptr_attr.data);
+ } else {
+ udata->inbuf = NULL;
+ udata->inlen = 0;
+ }
+
+ if (!IS_ERR(out)) {
+ udata->outbuf = u64_to_user_ptr(out->ptr_attr.data);
+ udata->outlen = out->ptr_attr.len;
+ } else {
+ udata->outbuf = NULL;
+ udata->outlen = 0;
+ }
+}
+
+int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
+ const void *from, size_t size)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+ size_t min_size;
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ min_size = min_t(size_t, attr->ptr_attr.len, size);
+ if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
+ return -EFAULT;
+
+ return uverbs_set_output(bundle, attr);
+}
+EXPORT_SYMBOL(uverbs_copy_to);
+
+
+/*
+ * This is only used if the caller has directly used copy_to_use to write the
+ * data. It signals to user space that the buffer is filled in.
+ */
+int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ return uverbs_set_output(bundle, attr);
+}
+
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
+{
+ const struct uverbs_attr *attr;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ if (IS_ERR(attr)) {
+ if ((PTR_ERR(attr) != -ENOENT) || !def_val)
+ return PTR_ERR(attr);
+
+ *to = *def_val;
+ } else {
+ *to = attr->ptr_attr.data;
+ }
+
+ if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound))
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL(_uverbs_get_const);
+
+int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ if (size < attr->ptr_attr.len) {
+ if (clear_user(u64_to_user_ptr(attr->ptr_attr.data + size),
+ attr->ptr_attr.len - size))
+ return -EFAULT;
+ }
+ return uverbs_copy_to(bundle, idx, from, size);
+}
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c
--- a/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c
@@ -49,12 +49,16 @@
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/pci.h>
+#include <linux/lockdep.h>
#include <asm/uaccess.h>
#include <rdma/ib.h>
+#include <rdma/uverbs_std_types.h>
#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -63,86 +67,17 @@
enum {
IB_UVERBS_MAJOR = 231,
IB_UVERBS_BASE_MINOR = 192,
- IB_UVERBS_MAX_DEVICES = 32
+ IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
+ IB_UVERBS_NUM_FIXED_MINOR = 32,
+ IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
};
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
-DEFINE_SPINLOCK(ib_uverbs_idr_lock);
-DEFINE_IDR(ib_uverbs_pd_idr);
-DEFINE_IDR(ib_uverbs_mr_idr);
-DEFINE_IDR(ib_uverbs_mw_idr);
-DEFINE_IDR(ib_uverbs_ah_idr);
-DEFINE_IDR(ib_uverbs_cq_idr);
-DEFINE_IDR(ib_uverbs_qp_idr);
-DEFINE_IDR(ib_uverbs_srq_idr);
-DEFINE_IDR(ib_uverbs_xrcd_idr);
-DEFINE_IDR(ib_uverbs_rule_idr);
-DEFINE_IDR(ib_uverbs_wq_idr);
-DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
-
-static DEFINE_SPINLOCK(map_lock);
-static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
-
-static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len) = {
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
- [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
- [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
- [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
- [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
- [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
- [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
- [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
- [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
- [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
- [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
- [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
- [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
- [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
-};
-
-static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw) = {
- [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
- [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
- [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
- [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
- [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
- [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
- [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
- [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
- [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
- [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
-};
-
+static DEFINE_IDA(uverbs_ida);
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
@@ -181,67 +116,56 @@
return ret;
}
-static void ib_uverbs_release_dev(struct kobject *kobj)
+static void ib_uverbs_release_dev(struct device *device)
{
struct ib_uverbs_device *dev =
- container_of(kobj, struct ib_uverbs_device, kobj);
+ container_of(device, struct ib_uverbs_device, dev);
+ uverbs_destroy_api(dev->uapi);
cleanup_srcu_struct(&dev->disassociate_srcu);
+ mutex_destroy(&dev->lists_mutex);
+ mutex_destroy(&dev->xrcd_tree_mutex);
kfree(dev);
}
-static struct kobj_type ib_uverbs_dev_ktype = {
- .release = ib_uverbs_release_dev,
-};
-
-static void ib_uverbs_release_event_file(struct kref *ref)
-{
- struct ib_uverbs_event_file *file =
- container_of(ref, struct ib_uverbs_event_file, ref);
-
- kfree(file);
-}
-
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
- struct ib_ucq_object *uobj)
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
+ struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
if (ev_file) {
- spin_lock_irq(&ev_file->lock);
+ spin_lock_irq(&ev_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&ev_file->lock);
+ spin_unlock_irq(&ev_file->ev_queue.lock);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ uverbs_uobject_put(&ev_file->uobj);
}
- spin_lock_irq(&file->async_file->lock);
- list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file->lock);
+ ib_uverbs_release_uevent(&uobj->uevent);
}
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
- struct ib_uevent_object *uobj)
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
{
+ struct ib_uverbs_async_event_file *async_file =
+ READ_ONCE(uobj->uobject.ufile->async_file);
struct ib_uverbs_event *evt, *tmp;
- spin_lock_irq(&file->async_file->lock);
+ if (!async_file)
+ return;
+
+ spin_lock_irq(&async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&async_file->ev_queue.lock);
}
-static void ib_uverbs_detach_umcast(struct ib_qp *qp,
- struct ib_uqp_object *uobj)
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj)
{
struct ib_uverbs_mcast_entry *mcast, *tmp;
@@ -252,140 +176,20 @@
}
}
-static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj, *tmp;
-
- context->closing = 1;
-
- list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
- struct ib_ah *ah = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
- ib_destroy_ah(ah);
- kfree(uobj);
- }
-
- /* Remove MWs before QPs, in order to support type 2A MWs. */
- list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
- struct ib_mw *mw = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- uverbs_dealloc_mw(mw);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
- struct ib_flow *flow_id = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
- ib_destroy_flow(flow_id);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
- struct ib_qp *qp = uobj->object;
- struct ib_uqp_object *uqp =
- container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- if (qp == qp->real_qp)
- ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
- ib_uverbs_release_uevent(file, &uqp->uevent);
- kfree(uqp);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
- struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
- struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
- kfree(ind_tbl);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
- struct ib_wq *wq = uobj->object;
- struct ib_uwq_object *uwq =
- container_of(uobj, struct ib_uwq_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
- ib_destroy_wq(wq);
- ib_uverbs_release_uevent(file, &uwq->uevent);
- kfree(uwq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
- struct ib_srq *srq = uobj->object;
- struct ib_uevent_object *uevent =
- container_of(uobj, struct ib_uevent_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
- ib_destroy_srq(srq);
- ib_uverbs_release_uevent(file, uevent);
- kfree(uevent);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
- struct ib_cq *cq = uobj->object;
- struct ib_uverbs_event_file *ev_file = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobj, struct ib_ucq_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
- ib_destroy_cq(cq);
- ib_uverbs_release_ucq(file, ev_file, ucq);
- kfree(ucq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
- struct ib_mr *mr = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
- ib_dereg_mr(mr);
- kfree(uobj);
- }
-
- mutex_lock(&file->device->xrcd_tree_mutex);
- list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
- struct ib_xrcd *xrcd = uobj->object;
- struct ib_uxrcd_object *uxrcd =
- container_of(uobj, struct ib_uxrcd_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- ib_uverbs_dealloc_xrcd(file->device, xrcd);
- kfree(uxrcd);
- }
- mutex_unlock(&file->device->xrcd_tree_mutex);
-
- list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
- struct ib_pd *pd = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
- ib_dealloc_pd(pd);
- kfree(uobj);
- }
-
- put_pid(context->tgid);
-
- return context->device->dealloc_ucontext(context);
-}
-
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
complete(&dev->comp);
}
-static void ib_uverbs_release_file(struct kref *ref)
+void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
struct ib_device *ib_dev;
int srcu_key;
+ release_ufile_idr_uobject(file);
+
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
@@ -396,61 +200,59 @@
if (atomic_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
+ if (file->async_file)
+ uverbs_uobject_put(&file->async_file->uobj);
+ put_device(&file->device->dev);
+
+ mutex_destroy(&file->umap_lock);
+ mutex_destroy(&file->ucontext_lock);
kfree(file);
}
-static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
- size_t count, loff_t *pos)
+static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
+ struct file *filp, char __user *buf,
+ size_t count, loff_t *pos,
+ size_t eventsz)
{
- struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
- int eventsz;
int ret = 0;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
- while (list_empty(&file->event_list)) {
- spin_unlock_irq(&file->lock);
+ while (list_empty(&ev_queue->event_list)) {
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
- if (wait_event_interruptible(file->poll_wait,
- (!list_empty(&file->event_list) ||
- /* The barriers built into wait_event_interruptible()
- * and wake_up() guarentee this will see the null set
- * without using RCU
- */
- !file->uverbs_file->device->ib_dev)))
+ if (wait_event_interruptible(ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
+ ev_queue->is_closed)))
return -ERESTARTSYS;
+ spin_lock_irq(&ev_queue->lock);
+
/* If device was disassociated and no event exists set an error */
- if (list_empty(&file->event_list) &&
- !file->uverbs_file->device->ib_dev)
+ if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
+ spin_unlock_irq(&ev_queue->lock);
return -EIO;
-
- spin_lock_irq(&file->lock);
+ }
}
- event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
-
- if (file->is_async)
- eventsz = sizeof (struct ib_uverbs_async_event_desc);
- else
- eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else {
- list_del(file->event_list.next);
+ list_del(ev_queue->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
if (event) {
if (copy_to_user(buf, event, eventsz))
@@ -464,483 +266,552 @@
return ret;
}
-static unsigned int ib_uverbs_event_poll(struct file *filp,
+static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
+ sizeof(struct ib_uverbs_async_event_desc));
+}
+
+static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
+ pos,
+ sizeof(struct ib_uverbs_comp_event_desc));
+}
+
+static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
+ struct file *filp,
struct poll_table_struct *wait)
{
- unsigned int pollflags = 0;
- struct ib_uverbs_event_file *file = filp->private_data;
+ __poll_t pollflags = 0;
- poll_wait(filp, &file->poll_wait, wait);
+ poll_wait(filp, &ev_queue->poll_wait, wait);
- spin_lock_irq(&file->lock);
- if (!list_empty(&file->event_list))
+ spin_lock_irq(&ev_queue->lock);
+ if (!list_empty(&ev_queue->event_list))
pollflags = POLLIN | POLLRDNORM;
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
return pollflags;
}
-static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
+static __poll_t ib_uverbs_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_async_event_file *file = filp->private_data;
- return fasync_helper(fd, filp, on, &file->async_queue);
+ return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
}
-static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
{
- struct ib_uverbs_event_file *file = filp->private_data;
- struct ib_uverbs_event *entry, *tmp;
- int closed_already = 0;
-
- mutex_lock(&file->uverbs_file->device->lists_mutex);
- spin_lock_irq(&file->lock);
- closed_already = file->is_closed;
- file->is_closed = 1;
- list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
- if (entry->counter)
- list_del(&entry->obj_list);
- kfree(entry);
- }
- spin_unlock_irq(&file->lock);
- if (!closed_already) {
- list_del(&file->list);
- if (file->is_async)
- ib_unregister_event_handler(&file->uverbs_file->
- event_handler);
- }
- mutex_unlock(&file->uverbs_file->device->lists_mutex);
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_event_file);
+ return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
+}
- return 0;
+static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
}
-static const struct file_operations uverbs_event_fops = {
+static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
+}
+
+const struct file_operations uverbs_event_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_uverbs_comp_event_read,
+ .poll = ib_uverbs_comp_event_poll,
+ .release = uverbs_uobject_fd_release,
+ .fasync = ib_uverbs_comp_event_fasync,
+ .llseek = no_llseek,
+};
+
+const struct file_operations uverbs_async_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
- .poll = ib_uverbs_event_poll,
- .release = ib_uverbs_event_close,
- .fasync = ib_uverbs_event_fasync,
+ .read = ib_uverbs_async_event_read,
+ .poll = ib_uverbs_async_event_poll,
+ .release = uverbs_uobject_fd_release,
+ .fasync = ib_uverbs_async_event_fasync,
.llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
- struct ib_uverbs_event_file *file = cq_context;
+ struct ib_uverbs_event_queue *ev_queue = cq_context;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
- if (!file)
+ if (!ev_queue)
return;
- spin_lock_irqsave(&file->lock, flags);
- if (file->is_closed) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ if (ev_queue->is_closed) {
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
- uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+ uobj = cq->uobject;
- entry->desc.comp.cq_handle = cq->uobject->user_handle;
+ entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle;
entry->counter = &uobj->comp_events_reported;
- list_add_tail(&entry->list, &file->event_list);
+ list_add_tail(&entry->list, &ev_queue->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
- wake_up_interruptible(&file->poll_wait);
- kill_fasync(&file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&ev_queue->poll_wait);
+ kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
-static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
- __u64 element, __u64 event,
- struct list_head *obj_list,
- u32 *counter)
+static void
+ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+ __u64 element, __u64 event, struct list_head *obj_list,
+ u32 *counter)
{
struct ib_uverbs_event *entry;
unsigned long flags;
- spin_lock_irqsave(&file->async_file->lock, flags);
- if (file->async_file->is_closed) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ if (!async_file)
+ return;
+
+ spin_lock_irqsave(&async_file->ev_queue.lock, flags);
+ if (async_file->ev_queue.is_closed) {
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
return;
}
- entry->desc.async.element = element;
+ entry->desc.async.element = element;
entry->desc.async.event_type = event;
- entry->desc.async.reserved = 0;
- entry->counter = counter;
+ entry->desc.async.reserved = 0;
+ entry->counter = counter;
- list_add_tail(&entry->list, &file->async_file->event_list);
+ list_add_tail(&entry->list, &async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
- wake_up_interruptible(&file->async_file->poll_wait);
- kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&async_file->ev_queue.poll_wait);
+ kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
-void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+static void uverbs_uobj_event(struct ib_uevent_object *eobj,
+ struct ib_event *event)
{
- struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
- struct ib_ucq_object, uobject);
+ ib_uverbs_async_handler(READ_ONCE(eobj->uobject.ufile->async_file),
+ eobj->uobject.user_handle, event->event,
+ &eobj->event_list, &eobj->events_reported);
+}
- ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
- event->event, &uobj->async_list,
- &uobj->async_events_reported);
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ uverbs_uobj_event(&event->element.cq->uobject->uevent, event);
}
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj;
-
/* for XRC target qp's, check that qp is live */
- if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+ if (!event->element.qp->uobject)
return;
- uobj = container_of(event->element.qp->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.qp->uobject->uevent, event);
}
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.wq->uobject->uevent, event);
}
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj;
-
- uobj = container_of(event->element.srq->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.srq->uobject->uevent, event);
}
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event)
+static void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
{
- struct ib_uverbs_file *file =
- container_of(handler, struct ib_uverbs_file, event_handler);
-
- ib_uverbs_async_handler(file, event->element.port_num, event->event,
- NULL, NULL);
+ ib_uverbs_async_handler(
+ container_of(handler, struct ib_uverbs_async_event_file,
+ event_handler),
+ event->element.port_num, event->event, NULL, NULL);
}
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
{
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
- file->async_file = NULL;
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ ev_queue->is_closed = 0;
+ ev_queue->async_queue = NULL;
}
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async)
+void ib_uverbs_init_async_event_file(
+ struct ib_uverbs_async_event_file *async_file)
{
- struct ib_uverbs_event_file *ev_file;
- struct file *filp;
- int ret;
-
- ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
- if (!ev_file)
- return ERR_PTR(-ENOMEM);
+ struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile;
+ struct ib_device *ib_dev = async_file->uobj.context->device;
- kref_init(&ev_file->ref);
- spin_lock_init(&ev_file->lock);
- INIT_LIST_HEAD(&ev_file->event_list);
- init_waitqueue_head(&ev_file->poll_wait);
- ev_file->uverbs_file = uverbs_file;
- kref_get(&ev_file->uverbs_file->ref);
- ev_file->async_queue = NULL;
- ev_file->is_closed = 0;
+ ib_uverbs_init_event_queue(&async_file->ev_queue);
- /*
- * fops_get() can't fail here, because we're coming from a
- * system call on a uverbs file, which will already have a
- * module reference.
- */
- filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops));
- if (IS_ERR(filp))
- goto err_put_refs;
- filp->private_data = ev_file;
-
- mutex_lock(&uverbs_file->device->lists_mutex);
- list_add_tail(&ev_file->list,
- &uverbs_file->device->uverbs_events_file_list);
- mutex_unlock(&uverbs_file->device->lists_mutex);
-
- if (is_async) {
- WARN_ON(uverbs_file->async_file);
- uverbs_file->async_file = ev_file;
- kref_get(&uverbs_file->async_file->ref);
- INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
- ib_dev,
- ib_uverbs_event_handler);
- ret = ib_register_event_handler(&uverbs_file->event_handler);
- if (ret)
- goto err_put_file;
-
- /* At that point async file stuff was fully set */
- ev_file->is_async = 1;
+ /* The first async_event_file becomes the default one for the file. */
+ mutex_lock(&uverbs_file->ucontext_lock);
+ if (!uverbs_file->async_file) {
+ /* Pairs with the put in ib_uverbs_release_file */
+ uverbs_uobject_get(&async_file->uobj);
+ atomic_store_rel_ptr((uintptr_t *)&uverbs_file->async_file, (uintptr_t)async_file);
}
+ mutex_unlock(&uverbs_file->ucontext_lock);
- return filp;
-
-err_put_file:
- fput(filp);
- kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
- uverbs_file->async_file = NULL;
- return ERR_PTR(ret);
-
-err_put_refs:
- kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
- return filp;
+ INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev,
+ ib_uverbs_event_handler);
+ ib_register_event_handler(&async_file->event_handler);
}
-/*
- * Look up a completion event file by FD. If lookup is successful,
- * takes a ref to the event file struct that it returns; if
- * unsuccessful, returns NULL.
- */
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
+static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
+ struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
+ const struct uverbs_api_write_method *method_elm)
{
- struct ib_uverbs_event_file *ev_file = NULL;
- struct fd f = fdget(fd);
+ if (method_elm->is_ex) {
+ count -= sizeof(*hdr) + sizeof(*ex_hdr);
- if (!f.file)
- return NULL;
+ if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
+ return -EINVAL;
- if (f.file->f_op != &uverbs_event_fops)
- goto out;
+ if (hdr->in_words * 8 < method_elm->req_size)
+ return -ENOSPC;
- ev_file = f.file->private_data;
- if (ev_file->is_async) {
- ev_file = NULL;
- goto out;
- }
+ if (ex_hdr->cmd_hdr_reserved)
+ return -EINVAL;
- kref_get(&ev_file->ref);
+ if (ex_hdr->response) {
+ if (!hdr->out_words && !ex_hdr->provider_out_words)
+ return -EINVAL;
-out:
- fdput(f);
- return ev_file;
-}
+ if (hdr->out_words * 8 < method_elm->resp_size)
+ return -ENOSPC;
-static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
-{
- u64 mask;
-
- if (command <= IB_USER_VERBS_CMD_OPEN_QP)
- mask = ib_dev->uverbs_cmd_mask;
- else
- mask = ib_dev->uverbs_ex_cmd_mask;
+ if (!access_ok(u64_to_user_ptr(ex_hdr->response),
+ (hdr->out_words + ex_hdr->provider_out_words) * 8))
+ return -EFAULT;
+ } else {
+ if (hdr->out_words || ex_hdr->provider_out_words)
+ return -EINVAL;
+ }
- if (mask & ((u64)1 << command))
return 0;
+ }
+
+ /* not extended command */
+ if (hdr->in_words * 4 != count)
+ return -EINVAL;
- return -1;
+ if (count < method_elm->req_size + sizeof(hdr)) {
+ /*
+ * rdma-core v18 and v19 have a bug where they send DESTROY_CQ
+ * with a 16 byte write instead of 24. Old kernels didn't
+ * check the size so they allowed this. Now that the size is
+ * checked provide a compatibility work around to not break
+ * those userspaces.
+ */
+ if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
+ count == 16) {
+ hdr->in_words = 6;
+ return 0;
+ }
+ return -ENOSPC;
+ }
+ if (hdr->out_words * 4 < method_elm->resp_size)
+ return -ENOSPC;
+
+ return 0;
}
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
- struct ib_device *ib_dev;
+ const struct uverbs_api_write_method *method_elm;
+ struct uverbs_api *uapi = file->device->uapi;
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_uverbs_cmd_hdr hdr;
- __u32 command;
- __u32 flags;
+ struct uverbs_attr_bundle bundle;
int srcu_key;
ssize_t ret;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_warn_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ current->pid, current->comm);
return -EACCES;
+ }
- if (count < sizeof hdr)
+ if (count < sizeof(hdr))
return -EINVAL;
- if (copy_from_user(&hdr, buf, sizeof hdr))
+ if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ib_dev = srcu_dereference(file->device->ib_dev,
- &file->device->disassociate_srcu);
- if (!ib_dev) {
- ret = -EIO;
- goto out;
- }
+ method_elm = uapi_get_method(uapi, hdr.command);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
- if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK)) {
- ret = -EINVAL;
- goto out;
+ if (method_elm->is_ex) {
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
}
- command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
- if (verify_command_mask(ib_dev, command)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
+ ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
+ if (ret)
+ return ret;
- if (!file->ucontext &&
- command != IB_USER_VERBS_CMD_GET_CONTEXT) {
- ret = -EINVAL;
- goto out;
- }
-
- flags = (hdr.command &
- IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
-
- if (!flags) {
- if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[command]) {
- ret = -EINVAL;
- goto out;
- }
-
- if (hdr.in_words * 4 != count) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = uverbs_cmd_table[command](file, ib_dev,
- buf + sizeof(hdr),
- hdr.in_words * 4,
- hdr.out_words * 4);
-
- } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
- struct ib_uverbs_ex_cmd_hdr ex_hdr;
- struct ib_udata ucore;
- struct ib_udata uhw;
- size_t written_count = count;
-
- if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
- !uverbs_ex_cmd_table[command]) {
- ret = -ENOSYS;
- goto out;
- }
-
- if (!file->ucontext) {
- ret = -EINVAL;
- goto out;
- }
-
- if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
- ret = -EINVAL;
- goto out;
- }
-
- if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
- ret = -EFAULT;
- goto out;
- }
-
- count -= sizeof(hdr) + sizeof(ex_hdr);
- buf += sizeof(hdr) + sizeof(ex_hdr);
-
- if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
- ret = -EINVAL;
- goto out;
- }
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- if (ex_hdr.cmd_hdr_reserved) {
- ret = -EINVAL;
- goto out;
+ buf += sizeof(hdr);
+
+ memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
+ bundle.ufile = file;
+ bundle.context = NULL; /* only valid if bundle has uobject */
+ if (!method_elm->is_ex) {
+ size_t in_len = hdr.in_words * 4 - sizeof(hdr);
+ size_t out_len = hdr.out_words * 4;
+ u64 response = 0;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.inlen =
+ in_len - method_elm->req_size;
+ in_len = method_elm->req_size;
+ if (bundle.driver_udata.inlen)
+ bundle.driver_udata.inbuf = buf + in_len;
+ else
+ bundle.driver_udata.inbuf = NULL;
+ } else {
+ memset(&bundle.driver_udata, 0,
+ sizeof(bundle.driver_udata));
}
- if (ex_hdr.response) {
- if (!hdr.out_words && !ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!access_ok((void __user *) (unsigned long) ex_hdr.response,
- (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
- ret = -EFAULT;
- goto out;
+ if (method_elm->has_resp) {
+ /*
+ * The macros check that if has_resp is set
+ * then the command request structure starts
+ * with a '__aligned u64 response' member.
+ */
+ ret = get_user(response, (const u64 __user *)buf);
+ if (ret)
+ goto out_unlock;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.outlen =
+ out_len - method_elm->resp_size;
+ out_len = method_elm->resp_size;
+ if (bundle.driver_udata.outlen)
+ bundle.driver_udata.outbuf =
+ u64_to_user_ptr(response +
+ out_len);
+ else
+ bundle.driver_udata.outbuf = NULL;
}
} else {
- if (hdr.out_words || ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
+ bundle.driver_udata.outlen = 0;
+ bundle.driver_udata.outbuf = NULL;
}
- ib_uverbs_init_udata_buf_or_null(&ucore, buf,
- u64_to_user_ptr(ex_hdr.response),
- hdr.in_words * 8, hdr.out_words * 8);
-
- ib_uverbs_init_udata_buf_or_null(&uhw,
- buf + ucore.inlen,
- u64_to_user_ptr(ex_hdr.response + ucore.outlen),
- ex_hdr.provider_in_words * 8,
- ex_hdr.provider_out_words * 8);
-
- ret = uverbs_ex_cmd_table[command](file,
- ib_dev,
- &ucore,
- &uhw);
- if (!ret)
- ret = written_count;
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.ucore, buf, u64_to_user_ptr(response),
+ in_len, out_len);
} else {
- ret = -ENOSYS;
+ buf += sizeof(ex_hdr);
+
+ ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
+ u64_to_user_ptr(ex_hdr.response),
+ hdr.in_words * 8, hdr.out_words * 8);
+
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.driver_udata, buf + bundle.ucore.inlen,
+ u64_to_user_ptr(ex_hdr.response + bundle.ucore.outlen),
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
+
}
-out:
+ ret = method_elm->handler(&bundle);
+out_unlock:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
- return ret;
+ return (ret) ? : count;
}
+static const struct vm_operations_struct rdma_umap_ops;
+
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct ib_uverbs_file *file = filp->private_data;
- struct ib_device *ib_dev;
+ struct ib_ucontext *ucontext;
int ret = 0;
int srcu_key;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ib_dev = srcu_dereference(file->device->ib_dev,
- &file->device->disassociate_srcu);
- if (!ib_dev) {
- ret = -EIO;
+ ucontext = ib_uverbs_get_ucontext_file(file);
+ if (IS_ERR(ucontext)) {
+ ret = PTR_ERR(ucontext);
goto out;
}
-
- if (!file->ucontext)
- ret = -ENODEV;
- else
- ret = ib_dev->mmap(file->ucontext, vma);
+ vma->vm_ops = &rdma_umap_ops;
+ ret = ucontext->device->mmap(ucontext, vma);
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
}
+/*
+ * The VMA has been dup'd, initialize the vm_private_data with a new tracking
+ * struct
+ */
+static void rdma_umap_open(struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+ struct rdma_umap_priv *opriv = vma->vm_private_data;
+ struct rdma_umap_priv *priv;
+
+ if (!opriv)
+ return;
+
+ /* We are racing with disassociation */
+ if (!down_read_trylock(&ufile->hw_destroy_rwsem))
+ goto out_zap;
+ /*
+ * Disassociation already completed, the VMA should already be zapped.
+ */
+ if (!ufile->ucontext)
+ goto out_unlock;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ goto out_unlock;
+ rdma_umap_priv_init(priv, vma, opriv->entry);
+
+ up_read(&ufile->hw_destroy_rwsem);
+ return;
+
+out_unlock:
+ up_read(&ufile->hw_destroy_rwsem);
+out_zap:
+ /*
+ * We can't allow the VMA to be created with the actual IO pages, that
+ * would break our API contract, and it can't be stopped at this
+ * point, so zap it.
+ */
+ vma->vm_private_data = NULL;
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
+static void rdma_umap_close(struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+ struct rdma_umap_priv *priv = vma->vm_private_data;
+
+ if (!priv)
+ return;
+
+ /*
+ * The vma holds a reference on the struct file that created it, which
+ * in turn means that the ib_uverbs_file is guaranteed to exist at
+ * this point.
+ */
+ mutex_lock(&ufile->umap_lock);
+
+ list_del(&priv->list);
+ mutex_unlock(&ufile->umap_lock);
+ kfree(priv);
+}
+
+static const struct vm_operations_struct rdma_umap_ops = {
+ .open = rdma_umap_open,
+ .close = rdma_umap_close,
+};
+
+void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
+{
+ struct rdma_umap_priv *priv, *next_priv;
+
+ lockdep_assert_held(&ufile->hw_destroy_rwsem);
+
+ while (1) {
+ struct mm_struct *mm = NULL;
+
+ /* Get an arbitrary mm pointer that hasn't been cleaned yet */
+ mutex_lock(&ufile->umap_lock);
+ while (!list_empty(&ufile->umaps)) {
+ int ret;
+
+ priv = list_first_entry(&ufile->umaps,
+ struct rdma_umap_priv, list);
+ mm = priv->vma->vm_mm;
+ ret = mmget_not_zero(mm);
+ if (!ret) {
+ list_del_init(&priv->list);
+ mm = NULL;
+ continue;
+ }
+ break;
+ }
+ mutex_unlock(&ufile->umap_lock);
+ if (!mm)
+ return;
+
+ /*
+ * The umap_lock is nested under mmap_sem since it used within
+ * the vma_ops callbacks, so we have to clean the list one mm
+ * at a time to get the lock ordering right. Typically there
+ * will only be one mm, so no big deal.
+ */
+ down_read(&mm->mmap_sem);
+ mutex_lock(&ufile->umap_lock);
+ list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
+ list) {
+ struct vm_area_struct *vma = priv->vma;
+
+ if (vma->vm_mm != mm)
+ continue;
+ list_del_init(&priv->list);
+
+ zap_vma_ptes(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start);
+ }
+ mutex_unlock(&ufile->umap_lock);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ }
+}
+
/*
* ib_uverbs_open() does not need the BKL:
*
@@ -964,6 +835,7 @@
if (!atomic_inc_not_zero(&dev->refcount))
return -ENXIO;
+ get_device(&dev->dev);
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
mutex_lock(&dev->lists_mutex);
ib_dev = srcu_dereference(dev->ib_dev,
@@ -995,18 +867,22 @@
}
file->device = dev;
- file->ucontext = NULL;
- file->async_file = NULL;
kref_init(&file->ref);
- mutex_init(&file->mutex);
- mutex_init(&file->cleanup_mutex);
+ mutex_init(&file->ucontext_lock);
+
+ spin_lock_init(&file->uobjects_lock);
+ INIT_LIST_HEAD(&file->uobjects);
+ init_rwsem(&file->hw_destroy_rwsem);
+ mutex_init(&file->umap_lock);
+ INIT_LIST_HEAD(&file->umaps);
filp->private_data = file;
- kobject_get(&dev->kobj);
list_add_tail(&file->list, &dev->uverbs_file_list);
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
+ setup_ufile_idr_uobject(file);
+
return nonseekable_open(inode, filp);
err_module:
@@ -1018,33 +894,21 @@
if (atomic_dec_and_test(&dev->refcount))
ib_uverbs_comp_dev(dev);
+ put_device(&dev->dev);
return ret;
}
static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
- struct ib_uverbs_device *dev = file->device;
- mutex_lock(&file->cleanup_mutex);
- if (file->ucontext) {
- ib_uverbs_cleanup_ucontext(file, file->ucontext);
- file->ucontext = NULL;
- }
- mutex_unlock(&file->cleanup_mutex);
+ uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
mutex_lock(&file->device->lists_mutex);
- if (!file->is_closed) {
- list_del(&file->list);
- file->is_closed = 1;
- }
+ list_del_init(&file->list);
mutex_unlock(&file->device->lists_mutex);
- if (file->async_file)
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
-
kref_put(&file->ref, ib_uverbs_release_file);
- kobject_put(&dev->kobj);
return 0;
}
@@ -1055,6 +919,8 @@
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
+ .unlocked_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = NULL,
};
static const struct file_operations uverbs_mmap_fops = {
@@ -1064,6 +930,8 @@
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
+ .unlocked_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = NULL,
};
static struct ib_client uverbs_client = {
@@ -1072,17 +940,15 @@
.remove = ib_uverbs_remove_one
};
-static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
+static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
+ struct ib_uverbs_device *dev =
+ container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV;
int srcu_key;
- struct ib_uverbs_device *dev = dev_get_drvdata(device);
struct ib_device *ib_dev;
- if (!dev)
- return -ENODEV;
-
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
@@ -1091,18 +957,17 @@
return ret;
}
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static DEVICE_ATTR(ibdev, S_IRUGO, ibdev_show, NULL);
-static ssize_t show_dev_abi_version(struct device *device,
- struct device_attribute *attr, char *buf)
+static ssize_t abi_version_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
- struct ib_uverbs_device *dev = dev_get_drvdata(device);
+ struct ib_uverbs_device *dev =
+ container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV;
int srcu_key;
struct ib_device *ib_dev;
- if (!dev)
- return -ENODEV;
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
@@ -1111,45 +976,31 @@
return ret;
}
-static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
+static DEVICE_ATTR(abi_version, S_IRUGO, abi_version_show, NULL);
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_VERBS_ABI_VERSION));
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
-
-/*
- * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
- * requesting a new major number and doubling the number of max devices we
- * support. It's stupid, but simple.
- */
-static int find_overflow_devnum(void)
+static int ib_uverbs_create_uapi(struct ib_device *device,
+ struct ib_uverbs_device *uverbs_dev)
{
- int ret;
+ struct uverbs_api *uapi;
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
- "infiniband_verbs");
- if (ret) {
- pr_err("user_verbs: couldn't register dynamic device number\n");
- return ret;
- }
- }
+ uapi = uverbs_alloc_api(device);
+ if (IS_ERR(uapi))
+ return PTR_ERR(uapi);
- ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
- if (ret >= IB_UVERBS_MAX_DEVICES)
- return -1;
-
- return ret;
+ uverbs_dev->uapi = uapi;
+ return 0;
}
static ssize_t
show_dev_device(struct device *device, struct device_attribute *attr, char *buf)
{
- struct ib_uverbs_device *dev = dev_get_drvdata(device);
+ struct ib_uverbs_device *dev =
+ container_of(device, struct ib_uverbs_device, dev);
- if (!dev || !dev->ib_dev->dma_device)
+ if (!dev->ib_dev->dma_device)
return -ENODEV;
return sprintf(buf, "0x%04x\n",
@@ -1160,9 +1011,10 @@
static ssize_t
show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf)
{
- struct ib_uverbs_device *dev = dev_get_drvdata(device);
+ struct ib_uverbs_device *dev =
+ container_of(device, struct ib_uverbs_device, dev);
- if (!dev || !dev->ib_dev->dma_device)
+ if (!dev->ib_dev->dma_device)
return -ENODEV;
return sprintf(buf, "0x%04x\n",
@@ -1170,16 +1022,15 @@
}
static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL);
-struct attribute *device_attrs[] =
-{
+static struct attribute *ib_dev_attrs[] = {
&dev_attr_device.attr,
&dev_attr_vendor.attr,
- NULL
+ NULL,
};
-static struct attribute_group device_group = {
- .name = "device",
- .attrs = device_attrs
+static const struct attribute_group dev_attr_group = {
+ .name = "device",
+ .attrs = ib_dev_attrs,
};
static void ib_uverbs_add_one(struct ib_device *device)
@@ -1192,7 +1043,7 @@
if (!device->alloc_ucontext)
return;
- uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
if (!uverbs_dev)
return;
@@ -1202,77 +1053,63 @@
return;
}
+ uverbs_dev->dev.class = uverbs_class;
+ uverbs_dev->dev.parent = device->dev.parent;
+ uverbs_dev->dev.release = ib_uverbs_release_dev;
+ device_initialize(&uverbs_dev->dev);
atomic_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
- kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
mutex_init(&uverbs_dev->lists_mutex);
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
- INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
-
- spin_lock(&map_lock);
- devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
- if (devnum >= IB_UVERBS_MAX_DEVICES) {
- spin_unlock(&map_lock);
- devnum = find_overflow_devnum();
- if (devnum < 0)
- goto err;
-
- spin_lock(&map_lock);
- uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
- } else {
- uverbs_dev->devnum = devnum;
- base = devnum + IB_UVERBS_BASE_DEV;
- set_bit(devnum, dev_map);
- }
- spin_unlock(&map_lock);
-
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
- cdev_init(&uverbs_dev->cdev, NULL);
+ devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
+ GFP_KERNEL);
+ if (devnum < 0)
+ goto err;
+ uverbs_dev->devnum = devnum;
+ if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
+ base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
+ else
+ base = IB_UVERBS_BASE_DEV + devnum;
+
+ if (ib_uverbs_create_uapi(device, uverbs_dev))
+ goto err_uapi;
+
+ uverbs_dev->dev.devt = base;
+ dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
+
+ cdev_init(&uverbs_dev->cdev,
+ device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
uverbs_dev->cdev.owner = THIS_MODULE;
- uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
+
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
- if (cdev_add(&uverbs_dev->cdev, base, 1))
- goto err_cdev;
+ ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
+ if (ret)
+ goto err_uapi;
- uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
- uverbs_dev->cdev.dev, uverbs_dev,
- "uverbs%d", uverbs_dev->devnum);
- if (IS_ERR(uverbs_dev->dev))
+ if (device_create_file(&uverbs_dev->dev, &dev_attr_ibdev))
+ goto err_cdev;
+ if (device_create_file(&uverbs_dev->dev, &dev_attr_abi_version))
+ goto err_cdev;
+ if (sysfs_create_group(&uverbs_dev->dev.kobj, &dev_attr_group))
goto err_cdev;
-
- if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
- goto err_class;
- if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
- goto err_class;
- if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group))
- goto err_class;
ib_set_client_data(device, &uverbs_client, uverbs_dev);
-
return;
-err_class:
- device_destroy(uverbs_class, uverbs_dev->cdev.dev);
-
err_cdev:
- cdev_del(&uverbs_dev->cdev);
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
-
+ cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
+err_uapi:
+ ida_free(&uverbs_ida, devnum);
err:
if (atomic_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
- kobject_put(&uverbs_dev->kobj);
+ put_device(&uverbs_dev->dev);
return;
}
@@ -1280,70 +1117,35 @@
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
- struct ib_uverbs_event_file *event_file;
- struct ib_event event;
/* Pending running commands to terminate */
- synchronize_srcu(&uverbs_dev->disassociate_srcu);
- event.event = IB_EVENT_DEVICE_FATAL;
- event.element.port_num = 0;
- event.device = ib_dev;
+ uverbs_disassociate_api_pre(uverbs_dev);
mutex_lock(&uverbs_dev->lists_mutex);
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
- struct ib_ucontext *ucontext;
file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list);
- file->is_closed = 1;
- list_del(&file->list);
+ list_del_init(&file->list);
kref_get(&file->ref);
- mutex_unlock(&uverbs_dev->lists_mutex);
-
- mutex_lock(&file->cleanup_mutex);
- ucontext = file->ucontext;
- file->ucontext = NULL;
- mutex_unlock(&file->cleanup_mutex);
-
- /* At this point ib_uverbs_close cannot be running
- * ib_uverbs_cleanup_ucontext
+ /* We must release the mutex before going ahead and calling
+ * uverbs_cleanup_ufile, as it might end up indirectly calling
+ * uverbs_close, for example due to freeing the resources (e.g
+ * mmput).
*/
- if (ucontext) {
- /* We must release the mutex before going ahead and
- * calling disassociate_ucontext. disassociate_ucontext
- * might end up indirectly calling uverbs_close,
- * for example due to freeing the resources
- * (e.g mmput).
- */
- ib_uverbs_event_handler(&file->event_handler, &event);
- ib_dev->disassociate_ucontext(ucontext);
- ib_uverbs_cleanup_ucontext(file, ucontext);
- }
+ mutex_unlock(&uverbs_dev->lists_mutex);
- mutex_lock(&uverbs_dev->lists_mutex);
- kref_put(&file->ref, ib_uverbs_release_file);
- }
+ ib_uverbs_async_handler(READ_ONCE(file->async_file), 0,
+ IB_EVENT_DEVICE_FATAL, NULL, NULL);
- while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
- event_file = list_first_entry(&uverbs_dev->
- uverbs_events_file_list,
- struct ib_uverbs_event_file,
- list);
- spin_lock_irq(&event_file->lock);
- event_file->is_closed = 1;
- spin_unlock_irq(&event_file->lock);
-
- list_del(&event_file->list);
- if (event_file->is_async) {
- ib_unregister_event_handler(&event_file->uverbs_file->
- event_handler);
- event_file->uverbs_file->event_handler.device = NULL;
- }
+ uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
+ kref_put(&file->ref, ib_uverbs_release_file);
- wake_up_interruptible(&event_file->poll_wait);
- kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+ mutex_lock(&uverbs_dev->lists_mutex);
}
mutex_unlock(&uverbs_dev->lists_mutex);
+
+ uverbs_disassociate_api(uverbs_dev->uapi);
}
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
@@ -1354,15 +1156,8 @@
if (!uverbs_dev)
return;
- sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group);
- dev_set_drvdata(uverbs_dev->dev, NULL);
- device_destroy(uverbs_class, uverbs_dev->cdev.dev);
- cdev_del(&uverbs_dev->cdev);
-
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(uverbs_dev->devnum, dev_map);
- else
- clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
+ cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
+ ida_free(&uverbs_ida, uverbs_dev->devnum);
if (device->disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
@@ -1376,7 +1171,6 @@
* cdev was deleted, however active clients can still issue
* commands and close their open files.
*/
- rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
ib_uverbs_free_hw_resources(uverbs_dev, device);
wait_clients = 0;
}
@@ -1385,7 +1179,8 @@
ib_uverbs_comp_dev(uverbs_dev);
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
- kobject_put(&uverbs_dev->kobj);
+
+ put_device(&uverbs_dev->dev);
}
static char *uverbs_devnode(struct device *dev, umode_t *mode)
@@ -1399,13 +1194,22 @@
{
int ret;
- ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
+ ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR,
"infiniband_verbs");
if (ret) {
pr_err("user_verbs: couldn't register device number\n");
goto out;
}
+ ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
+ IB_UVERBS_NUM_DYNAMIC_MINOR,
+ "infiniband_verbs");
+ if (ret) {
+ pr_err("couldn't register dynamic device number\n");
+ goto out_alloc;
+ }
+
uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
if (IS_ERR(uverbs_class)) {
ret = PTR_ERR(uverbs_class);
@@ -1433,7 +1237,12 @@
class_destroy(uverbs_class);
out_chrdev:
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
+
+out_alloc:
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
out:
return ret;
@@ -1443,16 +1252,10 @@
{
ib_unregister_client(&uverbs_client);
class_destroy(uverbs_class);
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
- idr_destroy(&ib_uverbs_pd_idr);
- idr_destroy(&ib_uverbs_mr_idr);
- idr_destroy(&ib_uverbs_mw_idr);
- idr_destroy(&ib_uverbs_ah_idr);
- idr_destroy(&ib_uverbs_cq_idr);
- idr_destroy(&ib_uverbs_qp_idr);
- idr_destroy(&ib_uverbs_srq_idr);
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
}
module_init_order(ib_uverbs_init, SI_ORDER_FIFTH);
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/file.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_ah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ return ib_destroy_ah_user((struct ib_ah *)uobject->object,
+ RDMA_DESTROY_AH_SLEEPABLE,
+ &attrs->driver_udata);
+}
+
+static int uverbs_free_flow(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_flow *flow = (struct ib_flow *)uobject->object;
+ struct ib_uflow_object *uflow =
+ container_of(uobject, struct ib_uflow_object, uobject);
+ struct ib_qp *qp = flow->qp;
+ int ret;
+
+ ret = flow->device->destroy_flow(flow);
+ if (!ret) {
+ if (qp)
+ atomic_dec(&qp->usecnt);
+ ib_uverbs_flow_resources_free(uflow->resources);
+ }
+
+ return ret;
+}
+
+static int uverbs_free_mw(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
+}
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ /*
+ * If this is a user triggered destroy then do not allow destruction
+ * until the user cleans up all the mcast bindings. Unlike in other
+ * places we forcibly clean up the mcast attachments for !DESTROY
+ * because the mcast attaches are not ubojects and will not be
+ * destroyed by anything else during cleanup processing.
+ */
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp_user(qp, &attrs->driver_udata);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(&uqp->uevent);
+ return ret;
+}
+
+static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
+ struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
+ int ret;
+
+ ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ kfree(ind_tbl);
+ return ret;
+}
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq(wq, &attrs->driver_udata);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ ib_uverbs_release_uevent(&uwq->uevent);
+ return ret;
+}
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq_user(srq, &attrs->driver_udata);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uevent, struct ib_usrq_object, uevent);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uevent);
+ return ret;
+}
+
+static int uverbs_free_xrcd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_xrcd *xrcd = uobject->object;
+ struct ib_uxrcd_object *uxrcd =
+ container_of(uobject, struct ib_uxrcd_object, uobject);
+ int ret;
+
+ ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
+ if (ret)
+ return ret;
+
+ mutex_lock(&attrs->ufile->device->xrcd_tree_mutex);
+ ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs);
+ mutex_unlock(&attrs->ufile->device->xrcd_tree_mutex);
+
+ return ret;
+}
+
+static int uverbs_free_pd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd = uobject->object;
+ int ret;
+
+ ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
+ if (ret)
+ return ret;
+
+ ib_dealloc_pd_user(pd, &attrs->driver_udata);
+ return 0;
+}
+
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
+{
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&event_queue->lock);
+ /*
+ * The user must ensure that no new items are added to the event_list
+ * once is_closed is set.
+ */
+ event_queue->is_closed = 1;
+ spin_unlock_irq(&event_queue->lock);
+ wake_up_interruptible(&event_queue->poll_wait);
+ kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+
+ spin_lock_irq(&event_queue->lock);
+ list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ spin_unlock_irq(&event_queue->lock);
+}
+
+static int
+uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_completion_event_file *file =
+ container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
+
+ ib_uverbs_free_event_queue(&file->ev_queue);
+ return 0;
+}
+
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
+{
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_destroy_def_handler);
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
+ uverbs_completion_event_file_destroy_uobj,
+ &uverbs_event_fops,
+ "[infinibandevent]",
+ FMODE_READ));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_QP,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_MW_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE,
+ UVERBS_OBJECT_MW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw),
+ &UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
+ uverbs_free_srq));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_AH_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE,
+ UVERBS_OBJECT_AH,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah),
+ &UVERBS_METHOD(UVERBS_METHOD_AH_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_FLOW_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_FLOW,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+ uverbs_free_flow),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_WQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_RWQ_IND_TBL_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl),
+ &UVERBS_METHOD(UVERBS_METHOD_RWQ_IND_TBL_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_XRCD_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_XRCD,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
+ uverbs_free_xrcd),
+ &UVERBS_METHOD(UVERBS_METHOD_XRCD_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_PD_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd),
+ &UVERBS_METHOD(UVERBS_METHOD_PD_DESTROY));
+
+const struct uapi_definition uverbs_def_obj_intf[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_PD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_XRCD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
+ {}
+};
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_async_fd.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_async_fd.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_async_fd.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+#include <sys/fcntl.h>
+
+static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_METHOD_ASYNC_EVENT_ALLOC);
+
+ ib_uverbs_init_async_event_file(
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+ return 0;
+}
+
+static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_async_event_file *event_file =
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj);
+
+ ib_unregister_event_handler(&event_file->event_handler);
+ ib_uverbs_free_event_queue(&event_file->ev_queue);
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_ASYNC_EVENT_ALLOC,
+ UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_async_event_file),
+ uverbs_async_event_destroy_uobj,
+ &uverbs_async_event_fops,
+ "[infinibandevent]",
+ FMODE_READ),
+ &UVERBS_METHOD(UVERBS_METHOD_ASYNC_EVENT_ALLOC));
+
+const struct uapi_definition uverbs_def_obj_async_fd[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_ASYNC_EVENT),
+ {}
+};
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_counters.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_counters.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_counters.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+#include <linux/overflow.h>
+
+static int uverbs_free_counters(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_counters *counters = uobject->object;
+ int ret;
+
+ ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
+ if (ret)
+ return ret;
+
+ return counters->device->destroy_counters(counters);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_counters *counters;
+ int ret;
+
+ /*
+ * This check should be removed once the infrastructure
+ * have the ability to remove methods from parse tree once
+ * such condition is met.
+ */
+ if (!ib_dev->create_counters)
+ return -EOPNOTSUPP;
+
+ counters = ib_dev->create_counters(ib_dev, attrs);
+ if (IS_ERR(counters)) {
+ ret = PTR_ERR(counters);
+ goto err_create_counters;
+ }
+
+ counters->device = ib_dev;
+ counters->uobject = uobj;
+ uobj->object = counters;
+ atomic_set(&counters->usecnt, 0);
+
+ return 0;
+
+err_create_counters:
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_counters_read_attr read_attr = {};
+ const struct uverbs_attr *uattr;
+ struct ib_counters *counters =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
+ int ret;
+
+ if (!counters->device->read_counters)
+ return -EOPNOTSUPP;
+
+ if (!atomic_read(&counters->usecnt))
+ return -EINVAL;
+
+ ret = uverbs_get_flags32(&read_attr.flags, attrs,
+ UVERBS_ATTR_READ_COUNTERS_FLAGS,
+ IB_UVERBS_READ_COUNTERS_PREFER_CACHED);
+ if (ret)
+ return ret;
+
+ uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
+ read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
+ read_attr.counters_buff = uverbs_zalloc(
+ attrs, array_size(read_attr.ncounters, sizeof(u64)));
+ if (IS_ERR(read_attr.counters_buff))
+ return PTR_ERR(read_attr.counters_buff);
+
+ ret = counters->device->read_counters(counters, &read_attr, attrs);
+ if (ret)
+ return ret;
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
+ read_attr.counters_buff,
+ read_attr.ncounters * sizeof(u64));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COUNTERS_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_COUNTERS_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COUNTERS_READ,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+ enum ib_uverbs_read_counters_flags));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
+
+const struct uapi_definition uverbs_def_obj_counters[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COUNTERS,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_counters)),
+ {}
+};
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_cq.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_cq.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_cq.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_cq_user(cq, &attrs->driver_udata);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ ib_uverbs_release_ucq(
+ ev_queue ? container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) :
+ NULL,
+ ucq);
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_device *ib_dev = attrs->context->device;
+ int ret;
+ u64 user_handle;
+ struct ib_cq_init_attr attr = {};
+ struct ib_cq *cq;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
+ struct ib_uobject *ev_file_uobj;
+
+ if (!ib_dev->create_cq || !ib_dev->destroy_cq)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.comp_vector, attrs,
+ UVERBS_ATTR_CREATE_CQ_COMP_VECTOR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.cqe, attrs,
+ UVERBS_ATTR_CREATE_CQ_CQE);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_CQ_USER_HANDLE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&attr.flags, attrs,
+ UVERBS_ATTR_CREATE_CQ_FLAGS,
+ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION |
+ IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN);
+ if (ret)
+ return ret;
+
+ ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
+ if (!IS_ERR(ev_file_uobj)) {
+ ev_file = container_of(ev_file_uobj,
+ struct ib_uverbs_completion_event_file,
+ uobj);
+ uverbs_uobject_get(ev_file_uobj);
+ }
+
+ if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ INIT_LIST_HEAD(&obj->comp_list);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+
+ cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
+ if (!cq) {
+ ret = -ENOMEM;
+ goto err_event_file;
+ }
+
+ cq->device = ib_dev;
+ cq->uobject = obj;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
+ atomic_set(&cq->usecnt, 0);
+
+ ret = ib_dev->create_cq(cq, &attr, &attrs->driver_udata);
+ if (ret)
+ goto err_free;
+
+ obj->uevent.uobject.object = cq;
+ obj->uevent.uobject.user_handle = user_handle;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
+ sizeof(cq->cqe));
+ if (ret)
+ goto err_cq;
+
+ return 0;
+err_cq:
+ ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs));
+ cq = NULL;
+err_free:
+ kfree(cq);
+err_event_file:
+ if (ev_file)
+ uverbs_uobject_put(ev_file_uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_CQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS,
+ enum ib_uverbs_ex_create_cq_flags),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
+ struct ib_ucq_object *obj =
+ container_of(uobj, struct ib_ucq_object, uevent.uobject);
+ struct ib_uverbs_destroy_cq_resp resp = {
+ .comp_events_reported = obj->comp_events_reported,
+ .async_events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_CQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_CQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
+
+#if 1 /* CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI */
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+#endif
+);
+
+const struct uapi_definition uverbs_def_obj_cq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_CQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_cq)),
+ {}
+};
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_device.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_device.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_device.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_ioctl.h>
+
+/*
+ * This ioctl method allows calling any defined write or write_ex
+ * handler. This essentially replaces the hdr/ex_hdr system with the ioctl
+ * marshalling, and brings the non-ex path into the same marshalling as the ex
+ * path.
+ */
+static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct uverbs_api *uapi = attrs->ufile->device->uapi;
+ const struct uverbs_api_write_method *method_elm;
+ u32 cmd;
+ int rc;
+
+ rc = uverbs_get_const(&cmd, attrs, UVERBS_ATTR_WRITE_CMD);
+ if (rc)
+ return rc;
+
+ method_elm = uapi_get_method(uapi, cmd);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+
+ uverbs_fill_udata(attrs, &attrs->ucore, UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_CORE_OUT);
+
+ if (attrs->ucore.inlen < method_elm->req_size ||
+ attrs->ucore.outlen < method_elm->resp_size)
+ return -ENOSPC;
+
+ return method_elm->handler(attrs);
+}
+
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_WRITE_CMD,
+ enum ib_uverbs_write_cmds,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CORE_OUT,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static uint32_t *
+gather_objects_handle(struct ib_uverbs_file *ufile,
+ const struct uverbs_api_object *uapi_object,
+ struct uverbs_attr_bundle *attrs,
+ ssize_t out_len,
+ u64 *total)
+{
+ u64 max_count = out_len / sizeof(u32);
+ struct ib_uobject *obj;
+ u64 count = 0;
+ u32 *handles;
+
+ /* Allocated memory that cannot page out where we gather
+ * all object ids under a spin_lock.
+ */
+ handles = uverbs_zalloc(attrs, out_len);
+ if (IS_ERR(handles))
+ return handles;
+
+ spin_lock_irq(&ufile->uobjects_lock);
+ list_for_each_entry(obj, &ufile->uobjects, list) {
+ u32 obj_id = obj->id;
+
+ if (obj->uapi_object != uapi_object)
+ continue;
+
+ if (count >= max_count)
+ break;
+
+ handles[count] = obj_id;
+ count++;
+ }
+ spin_unlock_irq(&ufile->uobjects_lock);
+
+ *total = count;
+ return handles;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)(
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_api_object *uapi_object;
+ ssize_t out_len;
+ u64 total = 0;
+ u16 object_id;
+ u32 *handles;
+ int ret;
+
+ out_len = uverbs_attr_get_len(attrs, UVERBS_ATTR_INFO_HANDLES_LIST);
+ if (out_len <= 0 || (out_len % sizeof(u32) != 0))
+ return -EINVAL;
+
+ ret = uverbs_get_const(&object_id, attrs, UVERBS_ATTR_INFO_OBJECT_ID);
+ if (ret)
+ return ret;
+
+ uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id);
+ if (!uapi_object)
+ return -EINVAL;
+
+ handles = gather_objects_handle(attrs->ufile, uapi_object, attrs,
+ out_len, &total);
+ if (IS_ERR(handles))
+ return PTR_ERR(handles);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_HANDLES_LIST, handles,
+ sizeof(u32) * total);
+ if (ret)
+ goto err;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_TOTAL_HANDLES, &total,
+ sizeof(total));
+err:
+ return ret;
+}
+
+void copy_port_attr_to_resp(struct ib_port_attr *attr,
+ struct ib_uverbs_query_port_resp *resp,
+ struct ib_device *ib_dev, u8 port_num)
+{
+ resp->state = attr->state;
+ resp->max_mtu = attr->max_mtu;
+ resp->active_mtu = attr->active_mtu;
+ resp->gid_tbl_len = attr->gid_tbl_len;
+ resp->port_cap_flags = make_port_cap_flags(attr);
+ resp->max_msg_sz = attr->max_msg_sz;
+ resp->bad_pkey_cntr = attr->bad_pkey_cntr;
+ resp->qkey_viol_cntr = attr->qkey_viol_cntr;
+ resp->pkey_tbl_len = attr->pkey_tbl_len;
+
+ if (attr->grh_required)
+ resp->flags |= IB_UVERBS_QPF_GRH_REQUIRED;
+
+ resp->lid = (u16)attr->lid;
+ resp->sm_lid = (u16)attr->sm_lid;
+ resp->lmc = attr->lmc;
+ resp->max_vl_num = attr->max_vl_num;
+ resp->sm_sl = attr->sm_sl;
+ resp->subnet_timeout = attr->subnet_timeout;
+ resp->init_type_reply = attr->init_type_reply;
+ resp->active_width = attr->active_width;
+ resp->active_speed = attr->active_speed;
+ resp->phys_state = attr->phys_state;
+ resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_device *ib_dev;
+ struct ib_port_attr attr = {};
+ struct ib_uverbs_query_port_resp_ex resp = {};
+ struct ib_ucontext *ucontext;
+ int ret;
+ u8 port_num;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
+ if (!ib_dev->query_port)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&port_num, attrs,
+ UVERBS_ATTR_QUERY_PORT_PORT_NUM);
+ if (ret)
+ return ret;
+
+ ret = ib_query_port(ib_dev, port_num, &attr);
+ if (ret)
+ return ret;
+
+ copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num);
+ resp.port_cap_flags2 = 0;
+
+ return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP,
+ &resp, sizeof(resp));
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u32 num_comp = attrs->ufile->device->num_comp_vectors;
+ u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS;
+ int ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+ &num_comp, sizeof(num_comp));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+ &core_support, sizeof(core_support));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = ib_alloc_ucontext(attrs);
+ if (ret)
+ return ret;
+ ret = ib_init_ucontext(attrs);
+ if (ret) {
+ kfree(attrs->context);
+ attrs->context = NULL;
+ return ret;
+ }
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_GET_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+ UVERBS_ATTR_TYPE(u64), UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_INFO_HANDLES,
+ /* Also includes any device specific object ids */
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID,
+ enum uverbs_default_objects, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_TOTAL_HANDLES,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_HANDLES_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)), UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_PORT,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_PORT_PORT_NUM, u8, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(
+ UVERBS_ATTR_QUERY_PORT_RESP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex,
+ reserved),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
+ &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
+ &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
+ &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT));
+
+const struct uapi_definition uverbs_def_obj_device[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
+ {},
+};
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_dm.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_dm.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_dm.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_dm(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm *dm = uobject->object;
+ int ret;
+
+ ret = ib_destroy_usecnt(&dm->usecnt, why, uobject);
+ if (ret)
+ return ret;
+
+ return dm->device->dealloc_dm(dm, attrs);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm_alloc_attr attr = {};
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)
+ ->obj_attr.uobject;
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_dm *dm;
+ int ret;
+
+ if (!ib_dev->alloc_dm)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_ALLOC_DM_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.alignment, attrs,
+ UVERBS_ATTR_ALLOC_DM_ALIGNMENT);
+ if (ret)
+ return ret;
+
+ dm = ib_dev->alloc_dm(ib_dev, attrs->context, &attr, attrs);
+ if (IS_ERR(dm))
+ return PTR_ERR(dm);
+
+ dm->device = ib_dev;
+ dm->length = attr.length;
+ dm->uobject = uobj;
+ atomic_set(&dm->usecnt, 0);
+
+ uobj->object = dm;
+
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DM_ALLOC,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_DM_FREE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
+
+const struct uapi_definition uverbs_def_obj_dm[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_dm)),
+ {}
+};
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_flow_action.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_flow_action.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_flow_action.c
@@ -0,0 +1,449 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_flow_action(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_flow_action *action = uobject->object;
+ int ret;
+
+ ret = ib_destroy_usecnt(&action->usecnt, why, uobject);
+ if (ret)
+ return ret;
+
+ return action->device->destroy_flow_action(action);
+}
+
+static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
+ u32 flags, bool is_modify)
+{
+ u64 verbs_flags = flags;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN))
+ verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED;
+
+ if (is_modify && uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS))
+ verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS;
+
+ return verbs_flags;
+};
+
+static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat)
+{
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm =
+ &keymat->keymat.aes_gcm;
+
+ if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return -EOPNOTSUPP;
+
+ if (aes_gcm->key_len != 32 &&
+ aes_gcm->key_len != 24 &&
+ aes_gcm->key_len != 16)
+ return -EINVAL;
+
+ if (aes_gcm->icv_len != 16 &&
+ aes_gcm->icv_len != 8 &&
+ aes_gcm->icv_len != 12)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = {
+ [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm,
+};
+
+static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify)
+{
+ /* This is used in order to modify an esp flow action with an enabled
+ * replay protection to a disabled one. This is only supported via
+ * modify, as in create verb we can simply drop the REPLAY attribute and
+ * achieve the same thing.
+ */
+ return is_modify ? 0 : -EINVAL;
+}
+
+static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify)
+{
+ /* Some replay protections could always be enabled without validating
+ * anything.
+ */
+ return 0;
+}
+
+static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify) = {
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none,
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok,
+};
+
+static int parse_esp_ip(enum ib_flow_spec_type proto,
+ const void __user *val_ptr,
+ size_t len, union ib_flow_spec *out)
+{
+ int ret;
+ const struct ib_uverbs_flow_ipv4_filter ipv4 = {
+ .src_ip = cpu_to_be32(0xffffffffUL),
+ .dst_ip = cpu_to_be32(0xffffffffUL),
+ .proto = 0xff,
+ .tos = 0xff,
+ .ttl = 0xff,
+ .flags = 0xff,
+ };
+ const struct ib_uverbs_flow_ipv6_filter ipv6 = {
+ .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ .flow_label = cpu_to_be32(0xffffffffUL),
+ .next_hdr = 0xff,
+ .traffic_class = 0xff,
+ .hop_limit = 0xff,
+ };
+ union {
+ struct ib_uverbs_flow_ipv4_filter ipv4;
+ struct ib_uverbs_flow_ipv6_filter ipv6;
+ } user_val = {};
+ const void *user_pmask;
+ size_t val_len;
+
+ /* If the flow IPv4/IPv6 flow specifications are extended, the mask
+ * should be changed as well.
+ */
+ BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) +
+ sizeof(ipv4.flags) != sizeof(ipv4));
+ BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) +
+ sizeof(ipv6.reserved) != sizeof(ipv6));
+
+ switch (proto) {
+ case IB_FLOW_SPEC_IPV4:
+ if (len > sizeof(user_val.ipv4) &&
+ !ib_is_buffer_cleared((const u8 *)val_ptr + sizeof(user_val.ipv4),
+ len - sizeof(user_val.ipv4)))
+ return -EOPNOTSUPP;
+
+ val_len = min_t(size_t, len, sizeof(user_val.ipv4));
+ ret = copy_from_user(&user_val.ipv4, val_ptr,
+ val_len);
+ if (ret)
+ return -EFAULT;
+
+ user_pmask = &ipv4;
+ break;
+ case IB_FLOW_SPEC_IPV6:
+ if (len > sizeof(user_val.ipv6) &&
+ !ib_is_buffer_cleared((const u8 *)val_ptr + sizeof(user_val.ipv6),
+ len - sizeof(user_val.ipv6)))
+ return -EOPNOTSUPP;
+
+ val_len = min_t(size_t, len, sizeof(user_val.ipv6));
+ ret = copy_from_user(&user_val.ipv6, val_ptr,
+ val_len);
+ if (ret)
+ return -EFAULT;
+
+ user_pmask = &ipv6;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask,
+ &user_val,
+ val_len, out);
+}
+
+static int flow_action_esp_get_encap(struct ib_flow_spec_list *out,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_flow_action_esp_encap uverbs_encap;
+ int ret;
+
+ ret = uverbs_copy_from(&uverbs_encap, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP);
+ if (ret)
+ return ret;
+
+ /* We currently support only one encap */
+ if (uverbs_encap.next_ptr)
+ return -EOPNOTSUPP;
+
+ if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 &&
+ uverbs_encap.type != IB_FLOW_SPEC_IPV6)
+ return -EOPNOTSUPP;
+
+ return parse_esp_ip(uverbs_encap.type,
+ u64_to_user_ptr(uverbs_encap.val_ptr),
+ uverbs_encap.len,
+ &out->spec);
+}
+
+struct ib_flow_action_esp_attr {
+ struct ib_flow_action_attrs_esp hdr;
+ struct ib_flow_action_attrs_esp_keymats keymat;
+ struct ib_flow_action_attrs_esp_replays replay;
+ /* We currently support only one spec */
+ struct ib_flow_spec_list encap;
+};
+
+#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
+static int parse_flow_action_esp(struct ib_device *ib_dev,
+ struct uverbs_attr_bundle *attrs,
+ struct ib_flow_action_esp_attr *esp_attr,
+ bool is_modify)
+{
+ struct ib_uverbs_flow_action_esp uverbs_esp = {};
+ int ret;
+
+ /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+ ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ESN);
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ /* This can be called from FLOW_ACTION_ESP_MODIFY where
+ * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional
+ */
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) {
+ ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS);
+ if (ret)
+ return ret;
+
+ if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1))
+ return -EOPNOTSUPP;
+
+ esp_attr->hdr.spi = uverbs_esp.spi;
+ esp_attr->hdr.seq = uverbs_esp.seq;
+ esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad;
+ esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts;
+ }
+ esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags,
+ is_modify);
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) {
+ esp_attr->keymat.protocol =
+ uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+ ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat,
+ attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+ if (ret)
+ return ret;
+
+ ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.keymat = &esp_attr->keymat;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) {
+ esp_attr->replay.protocol =
+ uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+
+ ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay,
+ attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+ if (ret)
+ return ret;
+
+ ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay,
+ is_modify);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.replay = &esp_attr->replay;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) {
+ ret = flow_action_esp_get_encap(&esp_attr->encap, attrs);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.encap = &esp_attr->encap;
+ }
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
+ struct ib_device *ib_dev = attrs->context->device;
+ int ret;
+ struct ib_flow_action *action;
+ struct ib_flow_action_esp_attr esp_attr = {};
+
+ if (!ib_dev->create_flow_action_esp)
+ return -EOPNOTSUPP;
+
+ ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false);
+ if (ret)
+ return ret;
+
+ /* No need to check as this attribute is marked as MANDATORY */
+ action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr,
+ attrs);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ uverbs_flow_action_fill_action(action, uobj, ib_dev,
+ IB_FLOW_ACTION_ESP);
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
+ struct ib_flow_action *action = uobj->object;
+ int ret;
+ struct ib_flow_action_esp_attr esp_attr = {};
+
+ if (!action->device->modify_flow_action_esp)
+ return -EOPNOTSUPP;
+
+ ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true);
+ if (ret)
+ return ret;
+
+ if (action->type != IB_FLOW_ACTION_ESP)
+ return -EINVAL;
+
+ return action->device->modify_flow_action_esp(action,
+ &esp_attr.hdr,
+ attrs);
+}
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
+ [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_STRUCT(
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
+ aes_key),
+ },
+};
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
+ size),
+ },
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+ hard_limit_pkts),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+ UVERBS_ATTR_TYPE(__u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat,
+ UA_MANDATORY),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+ hard_limit_pkts),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+ UVERBS_ATTR_TYPE(__u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat,
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_FLOW_ACTION_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+
+const struct uapi_definition uverbs_def_obj_flow_action[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_FLOW_ACTION,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow_action)),
+ {}
+};
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_mr.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_mr.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_mr.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ return ib_dereg_mr_user((struct ib_mr *)uobject->object,
+ &attrs->driver_udata);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_ADVISE_MR_PD_HANDLE);
+ enum ib_uverbs_advise_mr_advice advice;
+ struct ib_device *ib_dev = pd->device;
+ const struct ib_sge *sg_list;
+ int num_sge;
+ u32 flags;
+ int ret;
+
+ /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
+ if (!ib_dev->advise_mr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&advice, attrs, UVERBS_ATTR_ADVISE_MR_ADVICE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&flags, attrs, UVERBS_ATTR_ADVISE_MR_FLAGS,
+ IB_UVERBS_ADVISE_MR_FLAG_FLUSH);
+ if (ret)
+ return ret;
+
+ num_sge = uverbs_attr_ptr_get_array_size(
+ attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge));
+ if (num_sge < 0)
+ return num_sge;
+
+ sg_list = uverbs_attr_get_alloced_ptr(attrs,
+ UVERBS_ATTR_ADVISE_MR_SGE_LIST);
+ return ib_dev->advise_mr(pd, advice, flags, sg_list, num_sge,
+ attrs);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm_mr_attr attr = {};
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+ struct ib_dm *dm =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
+ struct ib_device *ib_dev = pd->device;
+
+ struct ib_mr *mr;
+ int ret;
+
+ if (!ib_dev->reg_dm_mr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_REG_DM_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&attr.access_flags, attrs,
+ UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ IB_ACCESS_SUPPORTED);
+ if (ret)
+ return ret;
+
+ if (!(attr.access_flags & IB_ZERO_BASED))
+ return -EINVAL;
+
+ ret = ib_check_mr_access(attr.access_flags);
+ if (ret)
+ return ret;
+
+ if (attr.offset > dm->length || attr.length > dm->length ||
+ attr.length > dm->length - attr.offset)
+ return -EINVAL;
+
+ mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->type = IB_MR_TYPE_DM;
+ mr->dm = dm;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&dm->usecnt);
+
+ uobj->object = mr;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey,
+ sizeof(mr->lkey));
+ if (ret)
+ goto err_dereg;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+ if (ret)
+ goto err_dereg;
+
+ return 0;
+
+err_dereg:
+ ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs));
+
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_ADVISE_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_ADVISE_MR_ADVICE,
+ enum ib_uverbs_advise_mr_advice,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_ADVISE_MR_FLAGS,
+ enum ib_uverbs_advise_mr_flag,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ADVISE_MR_SGE_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(struct ib_uverbs_sge)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DM_MR_REG,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ enum ib_access_flags),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_MR_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_MR,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
+ &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR));
+
+const struct uapi_definition uverbs_def_obj_mr[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
+ UAPI_DEF_OBJ_NEEDS_FN(dereg_mr)),
+ {}
+};
diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_uapi.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_uapi.c
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_uapi.c
@@ -0,0 +1,731 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ */
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/rdma_user_ioctl.h>
+#include <linux/bitops.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int ib_uverbs_notsupp(struct uverbs_attr_bundle *attrs)
+{
+ return -EOPNOTSUPP;
+}
+
+static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
+{
+ void *elm;
+ int rc;
+
+ if (key == UVERBS_API_KEY_ERR)
+ return ERR_PTR(-EOVERFLOW);
+
+ elm = kzalloc(alloc_size, GFP_KERNEL);
+ if (!elm)
+ return ERR_PTR(-ENOMEM);
+ rc = radix_tree_insert(&uapi->radix, key, elm);
+ if (rc) {
+ kfree(elm);
+ return ERR_PTR(rc);
+ }
+
+ return elm;
+}
+
+static void *uapi_add_get_elm(struct uverbs_api *uapi, u32 key,
+ size_t alloc_size, bool *exists)
+{
+ void *elm;
+
+ elm = uapi_add_elm(uapi, key, alloc_size);
+ if (!IS_ERR(elm)) {
+ *exists = false;
+ return elm;
+ }
+
+ if (elm != ERR_PTR(-EEXIST))
+ return elm;
+
+ elm = radix_tree_lookup(&uapi->radix, key);
+ if (WARN_ON(!elm))
+ return ERR_PTR(-EINVAL);
+ *exists = true;
+ return elm;
+}
+
+static int uapi_create_write(struct uverbs_api *uapi,
+ struct ib_device *ibdev,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 *cur_method_key)
+{
+ struct uverbs_api_write_method *method_elm;
+ u32 method_key = obj_key;
+ bool exists;
+
+ if (def->write.is_ex)
+ method_key |= uapi_key_write_ex_method(def->write.command_num);
+ else
+ method_key |= uapi_key_write_method(def->write.command_num);
+
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+
+ if (WARN_ON(exists && (def->write.is_ex != method_elm->is_ex)))
+ return -EINVAL;
+
+ method_elm->is_ex = def->write.is_ex;
+ method_elm->handler = def->func_write;
+ if (def->write.is_ex)
+ method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask &
+ BIT_ULL(def->write.command_num));
+ else
+ method_elm->disabled = !(ibdev->uverbs_cmd_mask &
+ BIT_ULL(def->write.command_num));
+
+ if (!def->write.is_ex && def->func_write) {
+ method_elm->has_udata = def->write.has_udata;
+ method_elm->has_resp = def->write.has_resp;
+ method_elm->req_size = def->write.req_size;
+ method_elm->resp_size = def->write.resp_size;
+ }
+
+ *cur_method_key = method_key;
+ return 0;
+}
+
+static int uapi_merge_method(struct uverbs_api *uapi,
+ struct uverbs_api_object *obj_elm, u32 obj_key,
+ const struct uverbs_method_def *method,
+ bool is_driver)
+{
+ u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
+ struct uverbs_api_ioctl_method *method_elm;
+ unsigned int i;
+ bool exists;
+
+ if (!method->attrs)
+ return 0;
+
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ if (exists) {
+ /*
+ * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
+ */
+ if (WARN_ON(method->handler))
+ return -EINVAL;
+ } else {
+ WARN_ON(!method->handler);
+ rcu_assign_pointer(method_elm->handler, method->handler);
+ if (method->handler != uverbs_destroy_def_handler)
+ method_elm->driver_method = is_driver;
+ }
+
+ for (i = 0; i != method->num_attrs; i++) {
+ const struct uverbs_attr_def *attr = (*method->attrs)[i];
+ struct uverbs_api_attr *attr_slot;
+
+ if (!attr)
+ continue;
+
+ /*
+ * ENUM_IN contains the 'ids' pointer to the driver's .rodata,
+ * so if it is specified by a driver then it always makes this
+ * into a driver method.
+ */
+ if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
+ method_elm->driver_method |= is_driver;
+
+ /*
+ * Like other uobject based things we only support a single
+ * uobject being NEW'd or DESTROY'd
+ */
+ if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
+ u8 access = attr->attr.u2.objs_arr.access;
+
+ if (WARN_ON(access == UVERBS_ACCESS_NEW ||
+ access == UVERBS_ACCESS_DESTROY))
+ return -EINVAL;
+ }
+
+ attr_slot =
+ uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
+ sizeof(*attr_slot));
+ /* Attributes are not allowed to be modified by drivers */
+ if (IS_ERR(attr_slot))
+ return PTR_ERR(attr_slot);
+
+ attr_slot->spec = attr->attr;
+ }
+
+ return 0;
+}
+
+static int uapi_merge_obj_tree(struct uverbs_api *uapi,
+ const struct uverbs_object_def *obj,
+ bool is_driver)
+{
+ struct uverbs_api_object *obj_elm;
+ unsigned int i;
+ u32 obj_key;
+ bool exists;
+ int rc;
+
+ obj_key = uapi_key_obj(obj->id);
+ obj_elm = uapi_add_get_elm(uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+
+ if (obj->type_attrs) {
+ if (WARN_ON(obj_elm->type_attrs))
+ return -EINVAL;
+
+ obj_elm->id = obj->id;
+ obj_elm->type_attrs = obj->type_attrs;
+ obj_elm->type_class = obj->type_attrs->type_class;
+ /*
+ * Today drivers are only permitted to use idr_class and
+ * fd_class types. We can revoke the IDR types during
+ * disassociation, and the FD types require the driver to use
+ * struct file_operations.owner to prevent the driver module
+ * code from unloading while the file is open. This provides
+ * enough safety that uverbs_uobject_fd_release() will
+ * continue to work. Drivers using FD are responsible to
+ * handle disassociation of the device on their own.
+ */
+ if (WARN_ON(is_driver &&
+ obj->type_attrs->type_class != &uverbs_idr_class &&
+ obj->type_attrs->type_class != &uverbs_fd_class))
+ return -EINVAL;
+ }
+
+ if (!obj->methods)
+ return 0;
+
+ for (i = 0; i != obj->num_methods; i++) {
+ const struct uverbs_method_def *method = (*obj->methods)[i];
+
+ if (!method)
+ continue;
+
+ rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
+ is_driver);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int uapi_disable_elm(struct uverbs_api *uapi,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 method_key)
+{
+ bool exists;
+
+ if (def->scope == UAPI_SCOPE_OBJECT) {
+ struct uverbs_api_object *obj_elm;
+
+ obj_elm = uapi_add_get_elm(
+ uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ obj_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ uapi_key_is_ioctl_method(method_key)) {
+ struct uverbs_api_ioctl_method *method_elm;
+
+ method_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*method_elm), &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ method_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ (uapi_key_is_write_method(method_key) ||
+ uapi_key_is_write_ex_method(method_key))) {
+ struct uverbs_api_write_method *write_elm;
+
+ write_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*write_elm), &exists);
+ if (IS_ERR(write_elm))
+ return PTR_ERR(write_elm);
+ write_elm->disabled = 1;
+ return 0;
+ }
+
+ WARN_ON(true);
+ return -EINVAL;
+}
+
+static int uapi_merge_def(struct uverbs_api *uapi, struct ib_device *ibdev,
+ const struct uapi_definition *def_list,
+ bool is_driver)
+{
+ const struct uapi_definition *def = def_list;
+ u32 cur_obj_key = UVERBS_API_KEY_ERR;
+ u32 cur_method_key = UVERBS_API_KEY_ERR;
+ bool exists;
+ int rc;
+
+ if (!def_list)
+ return 0;
+
+ for (;; def++) {
+ switch ((enum uapi_definition_kind)def->kind) {
+ case UAPI_DEF_CHAIN:
+ rc = uapi_merge_def(uapi, ibdev, def->chain, is_driver);
+ if (rc)
+ return rc;
+ continue;
+
+ case UAPI_DEF_CHAIN_OBJ_TREE:
+ if (WARN_ON(def->object_start.object_id !=
+ def->chain_obj_tree->id))
+ return -EINVAL;
+
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ rc = uapi_merge_obj_tree(uapi, def->chain_obj_tree,
+ is_driver);
+ if (rc)
+ return rc;
+ continue;
+
+ case UAPI_DEF_END:
+ return 0;
+
+ case UAPI_DEF_IS_SUPPORTED_DEV_FN: {
+ void **ibdev_fn =
+ (void *)((u8 *)ibdev + def->needs_fn_offset);
+
+ if (*ibdev_fn)
+ continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
+
+ case UAPI_DEF_IS_SUPPORTED_FUNC:
+ if (def->func_is_supported(ibdev))
+ continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+
+ case UAPI_DEF_OBJECT_START: {
+ struct uverbs_api_object *obj_elm;
+
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ obj_elm = uapi_add_get_elm(uapi, cur_obj_key,
+ sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ continue;
+ }
+
+ case UAPI_DEF_WRITE:
+ rc = uapi_create_write(
+ uapi, ibdev, def, cur_obj_key, &cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
+ WARN_ON(true);
+ return -EINVAL;
+ }
+}
+
+static int
+uapi_finalize_ioctl_method(struct uverbs_api *uapi,
+ struct uverbs_api_ioctl_method *method_elm,
+ u32 method_key)
+{
+ struct radix_tree_iter iter;
+ unsigned int num_attrs = 0;
+ unsigned int max_bkey = 0;
+ bool single_uobj = false;
+ void __rcu **slot;
+
+ method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN;
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter,
+ uapi_key_attrs_start(method_key)) {
+ struct uverbs_api_attr *elm =
+ rcu_dereference_protected(*slot, true);
+ u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK;
+ u32 attr_bkey = uapi_bkey_attr(attr_key);
+ u8 type = elm->spec.type;
+
+ if (uapi_key_attr_to_ioctl_method(iter.index) !=
+ uapi_key_attr_to_ioctl_method(method_key))
+ break;
+
+ if (elm->spec.mandatory)
+ __set_bit(attr_bkey, method_elm->attr_mandatory);
+
+ if (elm->spec.is_udata)
+ method_elm->has_udata = true;
+
+ if (type == UVERBS_ATTR_TYPE_IDR ||
+ type == UVERBS_ATTR_TYPE_FD) {
+ u8 access = elm->spec.u.obj.access;
+
+ /*
+ * Verbs specs may only have one NEW/DESTROY, we don't
+ * have the infrastructure to abort multiple NEW's or
+ * cope with multiple DESTROY failure.
+ */
+ if (access == UVERBS_ACCESS_NEW ||
+ access == UVERBS_ACCESS_DESTROY) {
+ if (WARN_ON(single_uobj))
+ return -EINVAL;
+
+ single_uobj = true;
+ if (WARN_ON(!elm->spec.mandatory))
+ return -EINVAL;
+ }
+
+ if (access == UVERBS_ACCESS_DESTROY)
+ method_elm->destroy_bkey = attr_bkey;
+ }
+
+ max_bkey = max(max_bkey, attr_bkey);
+ num_attrs++;
+ }
+
+ method_elm->key_bitmap_len = max_bkey + 1;
+ WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN);
+
+ uapi_compute_bundle_size(method_elm, num_attrs);
+ return 0;
+}
+
+static int uapi_finalize(struct uverbs_api *uapi)
+{
+ const struct uverbs_api_write_method **data;
+ unsigned long max_write_ex = 0;
+ unsigned long max_write = 0;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ int rc;
+ int i;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ rc = uapi_finalize_ioctl_method(uapi, method_elm,
+ iter.index);
+ if (rc)
+ return rc;
+ }
+
+ if (uapi_key_is_write_method(iter.index))
+ max_write = max(max_write,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ if (uapi_key_is_write_ex_method(iter.index))
+ max_write_ex =
+ max(max_write_ex,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ }
+
+ uapi->notsupp_method.handler = ib_uverbs_notsupp;
+ uapi->num_write = max_write + 1;
+ uapi->num_write_ex = max_write_ex + 1;
+ data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
+ sizeof(*uapi->write_methods), GFP_KERNEL);
+ for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
+ data[i] = &uapi->notsupp_method;
+ uapi->write_methods = data;
+ uapi->write_ex_methods = data + uapi->num_write;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_write_method(iter.index))
+ uapi->write_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
+ if (uapi_key_is_write_ex_method(iter.index))
+ uapi->write_ex_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
+ }
+
+ return 0;
+}
+
+static void uapi_remove_range(struct uverbs_api *uapi, u32 start, u32 last)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, start) {
+ if (iter.index > last)
+ return;
+ kfree(rcu_dereference_protected(*slot, true));
+ radix_tree_iter_delete(&uapi->radix, &iter, slot);
+ }
+}
+
+static void uapi_remove_object(struct uverbs_api *uapi, u32 obj_key)
+{
+ uapi_remove_range(uapi, obj_key,
+ obj_key | UVERBS_API_METHOD_KEY_MASK |
+ UVERBS_API_ATTR_KEY_MASK);
+}
+
+static void uapi_remove_method(struct uverbs_api *uapi, u32 method_key)
+{
+ uapi_remove_range(uapi, method_key,
+ method_key | UVERBS_API_ATTR_KEY_MASK);
+}
+
+
+static u32 uapi_get_obj_id(struct uverbs_attr_spec *spec)
+{
+ if (spec->type == UVERBS_ATTR_TYPE_IDR ||
+ spec->type == UVERBS_ATTR_TYPE_FD)
+ return spec->u.obj.obj_type;
+ if (spec->type == UVERBS_ATTR_TYPE_IDRS_ARRAY)
+ return spec->u2.objs_arr.obj_type;
+ return UVERBS_API_KEY_ERR;
+}
+
+static void uapi_key_okay(u32 key)
+{
+ unsigned int count = 0;
+
+ if (uapi_key_is_object(key))
+ count++;
+ if (uapi_key_is_ioctl_method(key))
+ count++;
+ if (uapi_key_is_write_method(key))
+ count++;
+ if (uapi_key_is_write_ex_method(key))
+ count++;
+ if (uapi_key_is_attr(key))
+ count++;
+ WARN(count != 1, "Bad count %d key=%x", count, key);
+}
+
+static void uapi_finalize_disable(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ u32 starting_key = 0;
+ bool scan_again = false;
+ void __rcu **slot;
+
+again:
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, starting_key) {
+ uapi_key_okay(iter.index);
+
+ if (uapi_key_is_object(iter.index)) {
+ struct uverbs_api_object *obj_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (obj_elm->disabled) {
+ /* Have to check all the attrs again */
+ scan_again = true;
+ starting_key = iter.index;
+ uapi_remove_object(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ starting_key = iter.index;
+ uapi_remove_method(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_write_method(iter.index) ||
+ uapi_key_is_write_ex_method(iter.index)) {
+ struct uverbs_api_write_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ kfree(method_elm);
+ radix_tree_iter_delete(&uapi->radix, &iter, slot);
+ }
+ continue;
+ }
+
+ if (uapi_key_is_attr(iter.index)) {
+ struct uverbs_api_attr *attr_elm =
+ rcu_dereference_protected(*slot, true);
+ const struct uverbs_api_object *tmp_obj;
+ u32 obj_key;
+
+ /*
+ * If the method has a mandatory object handle
+ * attribute which relies on an object which is not
+ * present then the entire method is uncallable.
+ */
+ if (!attr_elm->spec.mandatory)
+ continue;
+ obj_key = uapi_get_obj_id(&attr_elm->spec);
+ if (obj_key == UVERBS_API_KEY_ERR)
+ continue;
+ tmp_obj = uapi_get_object(uapi, obj_key);
+ if (IS_ERR(tmp_obj)) {
+ if (PTR_ERR(tmp_obj) == -ENOMSG)
+ continue;
+ } else {
+ if (!tmp_obj->disabled)
+ continue;
+ }
+
+ starting_key = iter.index;
+ uapi_remove_method(
+ uapi,
+ iter.index & (UVERBS_API_OBJ_KEY_MASK |
+ UVERBS_API_METHOD_KEY_MASK));
+ goto again;
+ }
+
+ WARN_ON(false);
+ }
+
+ if (!scan_again)
+ return;
+ scan_again = false;
+ starting_key = 0;
+ goto again;
+}
+
+void uverbs_destroy_api(struct uverbs_api *uapi)
+{
+ if (!uapi)
+ return;
+
+ uapi_remove_range(uapi, 0, U32_MAX);
+ kfree(uapi->write_methods);
+ kfree(uapi);
+}
+
+static const struct uapi_definition uverbs_core_api[] = {
+ UAPI_DEF_CHAIN(uverbs_def_obj_async_fd),
+ UAPI_DEF_CHAIN(uverbs_def_obj_counters),
+ UAPI_DEF_CHAIN(uverbs_def_obj_cq),
+ UAPI_DEF_CHAIN(uverbs_def_obj_device),
+ UAPI_DEF_CHAIN(uverbs_def_obj_dm),
+ UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
+ UAPI_DEF_CHAIN(uverbs_def_obj_intf),
+ UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+ UAPI_DEF_CHAIN(uverbs_def_write_intf),
+ {},
+};
+
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev)
+{
+ struct uverbs_api *uapi;
+ int rc;
+
+ uapi = kzalloc(sizeof(*uapi), GFP_KERNEL);
+ if (!uapi)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
+ uapi->driver_id = ibdev->ops.driver_id;
+
+ rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false);
+ if (rc)
+ goto err;
+ rc = uapi_merge_def(uapi, ibdev, ibdev->driver_def, true);
+ if (rc)
+ goto err;
+
+ uapi_finalize_disable(uapi);
+ rc = uapi_finalize(uapi);
+ if (rc)
+ goto err;
+
+ return uapi;
+err:
+ if (rc != -ENOMEM)
+ dev_err(&ibdev->dev,
+ "Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
+ rc);
+
+ uverbs_destroy_api(uapi);
+ return ERR_PTR(rc);
+}
+
+/*
+ * The pre version is done before destroying the HW objects, it only blocks
+ * off method access. All methods that require the ib_dev or the module data
+ * must test one of these assignments prior to continuing.
+ */
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev)
+{
+ struct uverbs_api *uapi = uverbs_dev->uapi;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->driver_method)
+ rcu_assign_pointer(method_elm->handler, NULL);
+ }
+ }
+
+ synchronize_srcu(&uverbs_dev->disassociate_srcu);
+}
+
+/*
+ * Called when a driver disassociates from the ib_uverbs_device. The
+ * assumption is that the driver module will unload after. Replace everything
+ * related to the driver with NULL as a safety measure.
+ */
+void uverbs_disassociate_api(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_object(iter.index)) {
+ struct uverbs_api_object *object_elm =
+ rcu_dereference_protected(*slot, true);
+
+ /*
+ * Some type_attrs are in the driver module. We don't
+ * bother to keep track of which since there should be
+ * no use of this after disassociate.
+ */
+ object_elm->type_attrs = NULL;
+ } else if (uapi_key_is_attr(iter.index)) {
+ struct uverbs_api_attr *elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN)
+ elm->spec.u2.enum_def.ids = NULL;
+ }
+ }
+}
diff --git a/sys/ofed/drivers/infiniband/core/ib_verbs.c b/sys/ofed/drivers/infiniband/core/ib_verbs.c
--- a/sys/ofed/drivers/infiniband/core/ib_verbs.c
+++ b/sys/ofed/drivers/infiniband/core/ib_verbs.c
@@ -47,6 +47,7 @@
#include <linux/slab.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/wait.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -267,10 +268,11 @@
{
struct ib_pd *pd;
int mr_access_flags = 0;
+ int ret;
- pd = device->alloc_pd(device, NULL, NULL);
- if (IS_ERR(pd))
- return pd;
+ pd = rdma_zalloc_drv_obj(device, ib_pd);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
pd->device = device;
pd->uobject = NULL;
@@ -278,6 +280,12 @@
atomic_set(&pd->usecnt, 0);
pd->flags = flags;
+ ret = device->alloc_pd(pd, NULL);
+ if (ret) {
+ kfree(pd);
+ return ERR_PTR(ret);
+ }
+
if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else
@@ -299,6 +307,7 @@
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_DMA;
mr->uobject = NULL;
mr->need_inval = false;
@@ -316,19 +325,20 @@
EXPORT_SYMBOL(__ib_alloc_pd);
/**
- * ib_dealloc_pd - Deallocates a protection domain.
+ * ib_dealloc_pd_user - Deallocates a protection domain.
* @pd: The protection domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
*
* It is an error to call this function while any resources in the pd still
* exist. The caller is responsible to synchronously destroy them and
* guarantee no new allocations will happen.
*/
-void ib_dealloc_pd(struct ib_pd *pd)
+void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
{
int ret;
if (pd->__internal_mr) {
- ret = pd->device->dereg_mr(pd->__internal_mr);
+ ret = pd->device->dereg_mr(pd->__internal_mr, NULL);
WARN_ON(ret);
pd->__internal_mr = NULL;
}
@@ -337,32 +347,98 @@
requires the caller to guarantee we can't race here. */
WARN_ON(atomic_read(&pd->usecnt));
- /* Making delalloc_pd a void return is a WIP, no driver should return
- an error here. */
- ret = pd->device->dealloc_pd(pd);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
+ pd->device->dealloc_pd(pd, udata);
+ kfree(pd);
}
-EXPORT_SYMBOL(ib_dealloc_pd);
+EXPORT_SYMBOL(ib_dealloc_pd_user);
/* Address handles */
-struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+static struct ib_ah *_ib_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr,
+ u32 flags,
+ struct ib_udata *udata)
{
+ struct ib_device *device = pd->device;
struct ib_ah *ah;
+ int ret;
- ah = pd->device->create_ah(pd, ah_attr, NULL);
+ might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
- if (!IS_ERR(ah)) {
- ah->device = pd->device;
- ah->pd = pd;
- ah->uobject = NULL;
- atomic_inc(&pd->usecnt);
+ if (!device->create_ah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ah = rdma_zalloc_drv_obj_gfp(
+ device, ib_ah,
+ (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ ah->device = device;
+ ah->pd = pd;
+
+ ret = device->create_ah(ah, ah_attr, flags, udata);
+ if (ret) {
+ kfree(ah);
+ return ERR_PTR(ret);
}
+ atomic_inc(&pd->usecnt);
+ return ah;
+}
+
+/**
+ * rdma_create_ah - Creates an address handle for the
+ * given address vector.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
+ *
+ * It returns 0 on success and returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ u32 flags)
+{
+ struct ib_ah *ah;
+
+ ah = _ib_create_ah(pd, ah_attr, flags, NULL);
+
return ah;
}
EXPORT_SYMBOL(ib_create_ah);
+/**
+ * ib_create_user_ah - Creates an address handle for the
+ * given address vector.
+ * It resolves destination mac address for ah attribute of RoCE type.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ * @udata: pointer to user's input output buffer information need by
+ * provider driver.
+ *
+ * It returns a valid address handle pointer on success and
+ * returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *ib_create_user_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+{
+ int err;
+
+ if (rdma_protocol_roce(pd->device, ah_attr->port_num)) {
+ err = ib_resolve_eth_dmac(pd->device, ah_attr);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return _ib_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata);
+}
+EXPORT_SYMBOL(ib_create_user_ah);
+
static int ib_get_header_version(const union rdma_network_hdr *hdr)
{
const struct ip *ip4h = (const struct ip *)&hdr->roce4grh;
@@ -589,7 +665,7 @@
if (ret)
return ERR_PTR(ret);
- return ib_create_ah(pd, &ah_attr);
+ return ib_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE);
}
EXPORT_SYMBOL(ib_create_ah_from_wc);
@@ -609,19 +685,20 @@
}
EXPORT_SYMBOL(ib_query_ah);
-int ib_destroy_ah(struct ib_ah *ah)
+int ib_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
{
struct ib_pd *pd;
- int ret;
+
+ might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
pd = ah->pd;
- ret = ah->device->destroy_ah(ah);
- if (!ret)
- atomic_dec(&pd->usecnt);
+ ah->device->destroy_ah(ah, flags);
+ atomic_dec(&pd->usecnt);
- return ret;
+ kfree(ah);
+ return 0;
}
-EXPORT_SYMBOL(ib_destroy_ah);
+EXPORT_SYMBOL(ib_destroy_ah_user);
/* Shared receive queues */
@@ -629,27 +706,40 @@
struct ib_srq_init_attr *srq_init_attr)
{
struct ib_srq *srq;
+ int ret;
if (!pd->device->create_srq)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
- srq = pd->device->create_srq(pd, srq_init_attr, NULL);
-
- if (!IS_ERR(srq)) {
- srq->device = pd->device;
- srq->pd = pd;
- srq->uobject = NULL;
- srq->event_handler = srq_init_attr->event_handler;
- srq->srq_context = srq_init_attr->srq_context;
- srq->srq_type = srq_init_attr->srq_type;
- if (srq->srq_type == IB_SRQT_XRC) {
- srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
- srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq;
- atomic_inc(&srq->ext.xrc.xrcd->usecnt);
- atomic_inc(&srq->ext.xrc.cq->usecnt);
- }
- atomic_inc(&pd->usecnt);
- atomic_set(&srq->usecnt, 0);
+ srq = rdma_zalloc_drv_obj(pd->device, ib_srq);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ srq->device = pd->device;
+ srq->pd = pd;
+ srq->event_handler = srq_init_attr->event_handler;
+ srq->srq_context = srq_init_attr->srq_context;
+ srq->srq_type = srq_init_attr->srq_type;
+
+ if (ib_srq_has_cq(srq->srq_type)) {
+ srq->ext.cq = srq_init_attr->ext.cq;
+ atomic_inc(&srq->ext.cq->usecnt);
+ }
+ if (srq->srq_type == IB_SRQT_XRC) {
+ srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
+ atomic_inc(&srq->ext.xrc.xrcd->usecnt);
+ }
+ atomic_inc(&pd->usecnt);
+
+ ret = pd->device->create_srq(srq, srq_init_attr, NULL);
+ if (ret) {
+ atomic_dec(&srq->pd->usecnt);
+ if (srq->srq_type == IB_SRQT_XRC)
+ atomic_dec(&srq->ext.xrc.xrcd->usecnt);
+ if (ib_srq_has_cq(srq->srq_type))
+ atomic_dec(&srq->ext.cq->usecnt);
+ kfree(srq);
+ return ERR_PTR(ret);
}
return srq;
@@ -674,36 +764,23 @@
}
EXPORT_SYMBOL(ib_query_srq);
-int ib_destroy_srq(struct ib_srq *srq)
+int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
{
- struct ib_pd *pd;
- enum ib_srq_type srq_type;
- struct ib_xrcd *uninitialized_var(xrcd);
- struct ib_cq *uninitialized_var(cq);
- int ret;
-
if (atomic_read(&srq->usecnt))
return -EBUSY;
- pd = srq->pd;
- srq_type = srq->srq_type;
- if (srq_type == IB_SRQT_XRC) {
- xrcd = srq->ext.xrc.xrcd;
- cq = srq->ext.xrc.cq;
- }
+ srq->device->destroy_srq(srq, udata);
- ret = srq->device->destroy_srq(srq);
- if (!ret) {
- atomic_dec(&pd->usecnt);
- if (srq_type == IB_SRQT_XRC) {
- atomic_dec(&xrcd->usecnt);
- atomic_dec(&cq->usecnt);
- }
- }
+ atomic_dec(&srq->pd->usecnt);
+ if (srq->srq_type == IB_SRQT_XRC)
+ atomic_dec(&srq->ext.xrc.xrcd->usecnt);
+ if (ib_srq_has_cq(srq->srq_type))
+ atomic_dec(&srq->ext.cq->usecnt);
+ kfree(srq);
- return ret;
+ return 0;
}
-EXPORT_SYMBOL(ib_destroy_srq);
+EXPORT_SYMBOL(ib_destroy_srq_user);
/* Queue pairs */
@@ -793,7 +870,7 @@
if (!IS_ERR(qp))
__ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
else
- real_qp->device->destroy_qp(real_qp);
+ real_qp->device->destroy_qp(real_qp, NULL);
return qp;
}
@@ -809,7 +886,7 @@
qp_init_attr->cap.max_recv_sge))
return ERR_PTR(-EINVAL);
- qp = device->create_qp(pd, qp_init_attr, NULL);
+ qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
if (IS_ERR(qp))
return qp;
@@ -1243,6 +1320,95 @@
}
EXPORT_SYMBOL(ib_resolve_eth_dmac);
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+ return (qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_RC ||
+ qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT);
+}
+
+/**
+ * IB core internal function to perform QP attributes modification.
+ */
+static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ int ret;
+
+ if (port < rdma_start_port(qp->device) ||
+ port > rdma_end_port(qp->device))
+ return -EINVAL;
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ /*
+ * Today the core code can only handle alternate paths and APM
+ * for IB. Ban them in roce mode.
+ */
+ if (!(rdma_protocol_ib(qp->device,
+ attr->alt_ah_attr.port_num) &&
+ rdma_protocol_ib(qp->device, port))) {
+ ret = EINVAL;
+ goto out;
+ }
+ }
+
+ /*
+ * If the user provided the qp_attr then we have to resolve it. Kernel
+ * users have to provide already resolved rdma_ah_attr's
+ */
+ if (udata && (attr_mask & IB_QP_AV) &&
+ rdma_protocol_roce(qp->device, port) &&
+ is_qp_type_connected(qp)) {
+ ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
+ if (ret)
+ goto out;
+ }
+
+ if (rdma_ib_or_roce(qp->device, port)) {
+ if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
+ dev_warn(&qp->device->dev,
+ "%s rq_psn overflow, masking to 24 bits\n",
+ __func__);
+ attr->rq_psn &= 0xffffff;
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
+ dev_warn(&qp->device->dev,
+ " %s sq_psn overflow, masking to 24 bits\n",
+ __func__);
+ attr->sq_psn &= 0xffffff;
+ }
+ }
+
+ ret = qp->device->modify_qp(qp, attr, attr_mask, udata);
+ if (ret)
+ goto out;
+
+ if (attr_mask & IB_QP_PORT)
+ qp->port = attr->port_num;
+out:
+ return ret;
+}
+
+/**
+ * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
+ * @ib_qp: The QP to modify.
+ * @attr: On input, specifies the QP attributes to modify. On output,
+ * the current values of selected QP attributes are returned.
+ * @attr_mask: A bit-mask used to specify which attributes of the QP
+ * are being modified.
+ * @udata: pointer to user's input output buffer information
+ * are being modified.
+ * It returns 0 on success and returns appropriate error code on error.
+ */
+int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
+}
+EXPORT_SYMBOL(ib_modify_qp_with_udata);
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
@@ -1319,7 +1485,7 @@
return 0;
}
-int ib_destroy_qp(struct ib_qp *qp)
+int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
{
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
@@ -1339,7 +1505,7 @@
srq = qp->srq;
ind_tbl = qp->rwq_ind_tbl;
- ret = qp->device->destroy_qp(qp);
+ ret = qp->device->destroy_qp(qp, udata);
if (!ret) {
if (pd)
atomic_dec(&pd->usecnt);
@@ -1355,32 +1521,40 @@
return ret;
}
-EXPORT_SYMBOL(ib_destroy_qp);
+EXPORT_SYMBOL(ib_destroy_qp_user);
/* Completion queues */
-struct ib_cq *ib_create_cq(struct ib_device *device,
- ib_comp_handler comp_handler,
- void (*event_handler)(struct ib_event *, void *),
- void *cq_context,
- const struct ib_cq_init_attr *cq_attr)
+struct ib_cq *__ib_create_cq(struct ib_device *device,
+ ib_comp_handler comp_handler,
+ void (*event_handler)(struct ib_event *, void *),
+ void *cq_context,
+ const struct ib_cq_init_attr *cq_attr,
+ const char *caller)
{
struct ib_cq *cq;
+ int ret;
- cq = device->create_cq(device, cq_attr, NULL, NULL);
+ cq = rdma_zalloc_drv_obj(device, ib_cq);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ cq->device = device;
+ cq->uobject = NULL;
+ cq->comp_handler = comp_handler;
+ cq->event_handler = event_handler;
+ cq->cq_context = cq_context;
+ atomic_set(&cq->usecnt, 0);
- if (!IS_ERR(cq)) {
- cq->device = device;
- cq->uobject = NULL;
- cq->comp_handler = comp_handler;
- cq->event_handler = event_handler;
- cq->cq_context = cq_context;
- atomic_set(&cq->usecnt, 0);
+ ret = device->create_cq(cq, cq_attr, NULL);
+ if (ret) {
+ kfree(cq);
+ return ERR_PTR(ret);
}
return cq;
}
-EXPORT_SYMBOL(ib_create_cq);
+EXPORT_SYMBOL(__ib_create_cq);
int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
@@ -1389,14 +1563,16 @@
}
EXPORT_SYMBOL(ib_modify_cq);
-int ib_destroy_cq(struct ib_cq *cq)
+int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
if (atomic_read(&cq->usecnt))
return -EBUSY;
- return cq->device->destroy_cq(cq);
+ cq->device->destroy_cq(cq, udata);
+ kfree(cq);
+ return 0;
}
-EXPORT_SYMBOL(ib_destroy_cq);
+EXPORT_SYMBOL(ib_destroy_cq_user);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
@@ -1407,24 +1583,31 @@
/* Memory regions */
-int ib_dereg_mr(struct ib_mr *mr)
+int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
{
struct ib_pd *pd = mr->pd;
+ struct ib_dm *dm = mr->dm;
+ struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
int ret;
- ret = mr->device->dereg_mr(mr);
- if (!ret)
+ ret = mr->device->dereg_mr(mr, udata);
+ if (!ret) {
atomic_dec(&pd->usecnt);
+ if (dm)
+ atomic_dec(&dm->usecnt);
+ kfree(sig_attrs);
+ }
return ret;
}
-EXPORT_SYMBOL(ib_dereg_mr);
+EXPORT_SYMBOL(ib_dereg_mr_user);
/**
- * ib_alloc_mr() - Allocates a memory region
+ * ib_alloc_mr_user() - Allocates a memory region
* @pd: protection domain associated with the region
* @mr_type: memory region type
* @max_num_sg: maximum sg entries available for registration.
+ * @udata: user data or null for kernel objects
*
* Notes:
* Memory registeration page/sg lists must not exceed max_num_sg.
@@ -1432,27 +1615,38 @@
* max_num_sg * used_page_size.
*
*/
-struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg)
+struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata)
{
struct ib_mr *mr;
- if (!pd->device->alloc_mr)
- return ERR_PTR(-ENOSYS);
+ if (!pd->device->alloc_mr) {
+ mr = ERR_PTR(-EOPNOTSUPP);
+ goto out;
+ }
+
+ if (mr_type == IB_MR_TYPE_INTEGRITY) {
+ WARN_ON_ONCE(1);
+ mr = ERR_PTR(-EINVAL);
+ goto out;
+ }
- mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
+ mr = pd->device->alloc_mr(pd, mr_type, max_num_sg, udata);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
+ mr->dm = NULL;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
+ mr->type = mr_type;
+ mr->sig_attrs = NULL;
}
+out:
return mr;
}
-EXPORT_SYMBOL(ib_alloc_mr);
+EXPORT_SYMBOL(ib_alloc_mr_user);
/* "Fast" memory regions */
@@ -1578,14 +1772,14 @@
}
EXPORT_SYMBOL(ib_detach_mcast);
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
{
struct ib_xrcd *xrcd;
if (!device->alloc_xrcd)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
- xrcd = device->alloc_xrcd(device, NULL, NULL);
+ xrcd = device->alloc_xrcd(device, NULL);
if (!IS_ERR(xrcd)) {
xrcd->device = device;
xrcd->inode = NULL;
@@ -1596,9 +1790,9 @@
return xrcd;
}
-EXPORT_SYMBOL(ib_alloc_xrcd);
+EXPORT_SYMBOL(__ib_alloc_xrcd);
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
struct ib_qp *qp;
int ret;
@@ -1612,8 +1806,9 @@
if (ret)
return ret;
}
+ mutex_destroy(&xrcd->tgt_qp_mutex);
- return xrcd->device->dealloc_xrcd(xrcd);
+ return xrcd->device->dealloc_xrcd(xrcd, udata);
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
@@ -1657,24 +1852,23 @@
EXPORT_SYMBOL(ib_create_wq);
/**
- * ib_destroy_wq - Destroys the specified WQ.
+ * ib_destroy_wq - Destroys the specified user WQ.
* @wq: The WQ to destroy.
+ * @udata: Valid user data
*/
-int ib_destroy_wq(struct ib_wq *wq)
+int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
- int err;
struct ib_cq *cq = wq->cq;
struct ib_pd *pd = wq->pd;
if (atomic_read(&wq->usecnt))
return -EBUSY;
- err = wq->device->destroy_wq(wq);
- if (!err) {
- atomic_dec(&pd->usecnt);
- atomic_dec(&cq->usecnt);
- }
- return err;
+ wq->device->destroy_wq(wq, udata);
+ atomic_dec(&pd->usecnt);
+ atomic_dec(&cq->usecnt);
+
+ return 0;
}
EXPORT_SYMBOL(ib_destroy_wq);
@@ -1761,33 +1955,6 @@
}
EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain)
-{
- struct ib_flow *flow_id;
- if (!qp->device->create_flow)
- return ERR_PTR(-ENOSYS);
-
- flow_id = qp->device->create_flow(qp, flow_attr, domain);
- if (!IS_ERR(flow_id))
- atomic_inc(&qp->usecnt);
- return flow_id;
-}
-EXPORT_SYMBOL(ib_create_flow);
-
-int ib_destroy_flow(struct ib_flow *flow_id)
-{
- int err;
- struct ib_qp *qp = flow_id->qp;
-
- err = qp->device->destroy_flow(flow_id);
- if (!err)
- atomic_dec(&qp->usecnt);
- return err;
-}
-EXPORT_SYMBOL(ib_destroy_flow);
-
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
diff --git a/sys/ofed/drivers/infiniband/core/rdma_core.h b/sys/ofed/drivers/infiniband/core/rdma_core.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/rdma_core.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_CORE_H
+#define RDMA_CORE_H
+
+#include <linux/idr.h>
+#include <linux/radix-tree.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mutex.h>
+
+struct ib_uverbs_device;
+
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason);
+
+int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs);
+
+/*
+ * Get an ib_uobject that corresponds to the given id from ufile, assuming
+ * the object is from the given type. Lock it to the required access when
+ * applicable.
+ * This function could create (access == NEW), destroy (access == DESTROY)
+ * or unlock (access == READ || access == WRITE) objects if required.
+ * The action will be finalized only when uverbs_finalize_object or
+ * uverbs_finalize_objects are called.
+ */
+struct ib_uobject *
+uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
+ s64 id, struct uverbs_attr_bundle *attrs);
+
+void uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access, bool commit,
+ struct uverbs_attr_bundle *attrs);
+
+int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx);
+
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+
+struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs);
+
+/*
+ * This is the runtime description of the uverbs API, used by the syscall
+ * machinery to validate and dispatch calls.
+ */
+
+/*
+ * Depending on ID the slot pointer in the radix tree points at one of these
+ * structs.
+ */
+
+struct uverbs_api_ioctl_method {
+ int(__rcu *handler)(struct uverbs_attr_bundle *attrs);
+ DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
+ u16 bundle_size;
+ u8 use_stack:1;
+ u8 driver_method:1;
+ u8 disabled:1;
+ u8 has_udata:1;
+ u8 key_bitmap_len;
+ u8 destroy_bkey;
+};
+
+struct uverbs_api_write_method {
+ int (*handler)(struct uverbs_attr_bundle *attrs);
+ u8 disabled:1;
+ u8 is_ex:1;
+ u8 has_udata:1;
+ u8 has_resp:1;
+ u8 req_size;
+ u8 resp_size;
+};
+
+struct uverbs_api_attr {
+ struct uverbs_attr_spec spec;
+};
+
+struct uverbs_api {
+ /* radix tree contains struct uverbs_api_* pointers */
+ struct radix_tree_root radix;
+ enum rdma_driver_id driver_id;
+
+ unsigned int num_write;
+ unsigned int num_write_ex;
+ struct uverbs_api_write_method notsupp_method;
+ const struct uverbs_api_write_method **write_methods;
+ const struct uverbs_api_write_method **write_ex_methods;
+};
+
+/*
+ * Get an uverbs_api_object that corresponds to the given object_id.
+ * Note:
+ * -ENOMSG means that any object is allowed to match during lookup.
+ */
+static inline const struct uverbs_api_object *
+uapi_get_object(struct uverbs_api *uapi, u16 object_id)
+{
+ const struct uverbs_api_object *res;
+
+ if (object_id == UVERBS_IDR_ANY_OBJECT)
+ return ERR_PTR(-ENOMSG);
+
+ res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+ if (!res)
+ return ERR_PTR(-ENOENT);
+
+ return res;
+}
+
+char *uapi_key_format(char *S, unsigned int key);
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev);
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
+void uverbs_disassociate_api(struct uverbs_api *uapi);
+void uverbs_destroy_api(struct uverbs_api *uapi);
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+ unsigned int num_attrs);
+void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
+
+extern const struct uapi_definition uverbs_def_obj_async_fd[];
+extern const struct uapi_definition uverbs_def_obj_counters[];
+extern const struct uapi_definition uverbs_def_obj_cq[];
+extern const struct uapi_definition uverbs_def_obj_device[];
+extern const struct uapi_definition uverbs_def_obj_dm[];
+extern const struct uapi_definition uverbs_def_obj_flow_action[];
+extern const struct uapi_definition uverbs_def_obj_intf[];
+extern const struct uapi_definition uverbs_def_obj_mr[];
+extern const struct uapi_definition uverbs_def_write_intf[];
+
+static inline const struct uverbs_api_write_method *
+uapi_get_method(const struct uverbs_api *uapi, u32 command)
+{
+ u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return ERR_PTR(-EINVAL);
+
+ if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ if (cmd_idx >= uapi->num_write_ex)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_ex_methods[cmd_idx];
+ }
+
+ if (cmd_idx >= uapi->num_write)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_methods[cmd_idx];
+}
+
+void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
+ struct ib_udata *udata, unsigned int attr_in,
+ unsigned int attr_out);
+
+#endif /* RDMA_CORE_H */
diff --git a/sys/ofed/drivers/infiniband/core/uverbs.h b/sys/ofed/drivers/infiniband/core/uverbs.h
--- a/sys/ofed/drivers/infiniband/core/uverbs.h
+++ b/sys/ofed/drivers/infiniband/core/uverbs.h
@@ -49,10 +49,15 @@
#include <linux/srcu.h>
#include <linux/rcupdate.h>
#include <linux/rbtree.h>
+#include <linux/wait.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_std_types.h>
+
+#define UVERBS_MODULE_NAME ib_uverbs
+#include <rdma/uverbs_named_ioctl.h>
static inline void
ib_uverbs_init_udata(struct ib_udata *udata,
@@ -90,53 +95,76 @@
* an asynchronous event queue file is created and released when the
* event file is closed.
*
- * struct ib_uverbs_event_file: One reference is held by the VFS and
- * released when the file is closed. For asynchronous event files,
- * another reference is held by the corresponding main context file
- * and released when that file is closed. For completion event files,
- * a reference is taken when a CQ is created that uses the file, and
- * released when the CQ is destroyed.
+ * struct ib_uverbs_event_queue: Base structure for
+ * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file.
+ * One reference is held by the VFS and released when the file is closed.
+ * For asynchronous event files, another reference is held by the corresponding
+ * main context file and released when that file is closed. For completion
+ * event files, a reference is taken when a CQ is created that uses the file,
+ * and released when the CQ is destroyed.
*/
struct ib_uverbs_device {
atomic_t refcount;
- int num_comp_vectors;
+ u32 num_comp_vectors;
struct completion comp;
- struct device *dev;
+ struct device dev;
struct ib_device __rcu *ib_dev;
int devnum;
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
- struct kobject kobj;
struct srcu_struct disassociate_srcu;
struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list;
- struct list_head uverbs_events_file_list;
+ struct uverbs_api *uapi;
};
-struct ib_uverbs_event_file {
- struct kref ref;
- int is_async;
- struct ib_uverbs_file *uverbs_file;
+struct ib_uverbs_event_queue {
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
- struct list_head list;
+};
+
+struct ib_uverbs_async_event_file {
+ struct ib_uobject uobj;
+ struct ib_uverbs_event_queue ev_queue;
+ struct ib_event_handler event_handler;
+};
+
+struct ib_uverbs_completion_event_file {
+ struct ib_uobject uobj;
+ struct ib_uverbs_event_queue ev_queue;
};
struct ib_uverbs_file {
struct kref ref;
- struct mutex mutex;
- struct mutex cleanup_mutex; /* protect cleanup */
struct ib_uverbs_device *device;
+ struct mutex ucontext_lock;
+ /*
+ * ucontext must be accessed via ib_uverbs_get_ucontext() or with
+ * ucontext_lock held
+ */
struct ib_ucontext *ucontext;
- struct ib_event_handler event_handler;
- struct ib_uverbs_event_file *async_file;
+ struct ib_uverbs_async_event_file *async_file;
struct list_head list;
- int is_closed;
+
+ /*
+ * To access the uobjects list hw_destroy_rwsem must be held for write
+ * OR hw_destroy_rwsem held for read AND uobjects_lock held.
+ * hw_destroy_rwsem should be called across any destruction of the HW
+ * object of an associated uobject.
+ */
+ struct rw_semaphore hw_destroy_rwsem;
+ spinlock_t uobjects_lock;
+ struct list_head uobjects;
+
+ struct mutex umap_lock;
+ struct list_head umaps;
+
+ struct xarray idr;
};
struct ib_uverbs_event {
@@ -157,6 +185,7 @@
struct ib_uevent_object {
struct ib_uobject uobject;
+ /* List member for ib_uverbs_async_event_file list */
struct list_head event_list;
u32 events_reported;
};
@@ -184,51 +213,40 @@
};
struct ib_ucq_object {
- struct ib_uobject uobject;
- struct ib_uverbs_file *uverbs_file;
+ struct ib_uevent_object uevent;
struct list_head comp_list;
- struct list_head async_list;
u32 comp_events_reported;
- u32 async_events_reported;
};
-extern spinlock_t ib_uverbs_idr_lock;
-extern struct idr ib_uverbs_pd_idr;
-extern struct idr ib_uverbs_mr_idr;
-extern struct idr ib_uverbs_mw_idr;
-extern struct idr ib_uverbs_ah_idr;
-extern struct idr ib_uverbs_cq_idr;
-extern struct idr ib_uverbs_qp_idr;
-extern struct idr ib_uverbs_srq_idr;
-extern struct idr ib_uverbs_xrcd_idr;
-extern struct idr ib_uverbs_rule_idr;
-extern struct idr ib_uverbs_wq_idr;
-extern struct idr ib_uverbs_rwq_ind_tbl_idr;
-
-void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
-
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async);
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
-
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+extern const struct file_operations uverbs_event_fops;
+extern const struct file_operations uverbs_async_event_fops;
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
+void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file);
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs);
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs);
+
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
- struct ib_uevent_object *uobj);
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj);
+void ib_uverbs_release_file(struct kref *ref);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event);
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs);
int uverbs_dealloc_mw(struct ib_mw *mw);
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj);
+
+long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
struct ib_uverbs_flow_spec {
union {
@@ -242,68 +260,45 @@
};
struct ib_uverbs_flow_spec_eth eth;
struct ib_uverbs_flow_spec_ipv4 ipv4;
+ struct ib_uverbs_flow_spec_esp esp;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
+ struct ib_uverbs_flow_spec_action_tag flow_tag;
+ struct ib_uverbs_flow_spec_action_drop drop;
+ struct ib_uverbs_flow_spec_action_handle action;
+ struct ib_uverbs_flow_spec_action_count flow_count;
};
};
-#define IB_UVERBS_DECLARE_CMD(name) \
- ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
- struct ib_device *ib_dev, \
- const char __user *buf, int in_len, \
- int out_len)
-
-IB_UVERBS_DECLARE_CMD(get_context);
-IB_UVERBS_DECLARE_CMD(query_device);
-IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(alloc_pd);
-IB_UVERBS_DECLARE_CMD(dealloc_pd);
-IB_UVERBS_DECLARE_CMD(reg_mr);
-IB_UVERBS_DECLARE_CMD(rereg_mr);
-IB_UVERBS_DECLARE_CMD(dereg_mr);
-IB_UVERBS_DECLARE_CMD(alloc_mw);
-IB_UVERBS_DECLARE_CMD(dealloc_mw);
-IB_UVERBS_DECLARE_CMD(create_comp_channel);
-IB_UVERBS_DECLARE_CMD(create_cq);
-IB_UVERBS_DECLARE_CMD(resize_cq);
-IB_UVERBS_DECLARE_CMD(poll_cq);
-IB_UVERBS_DECLARE_CMD(req_notify_cq);
-IB_UVERBS_DECLARE_CMD(destroy_cq);
-IB_UVERBS_DECLARE_CMD(create_qp);
-IB_UVERBS_DECLARE_CMD(open_qp);
-IB_UVERBS_DECLARE_CMD(query_qp);
-IB_UVERBS_DECLARE_CMD(modify_qp);
-IB_UVERBS_DECLARE_CMD(destroy_qp);
-IB_UVERBS_DECLARE_CMD(post_send);
-IB_UVERBS_DECLARE_CMD(post_recv);
-IB_UVERBS_DECLARE_CMD(post_srq_recv);
-IB_UVERBS_DECLARE_CMD(create_ah);
-IB_UVERBS_DECLARE_CMD(destroy_ah);
-IB_UVERBS_DECLARE_CMD(attach_mcast);
-IB_UVERBS_DECLARE_CMD(detach_mcast);
-IB_UVERBS_DECLARE_CMD(create_srq);
-IB_UVERBS_DECLARE_CMD(modify_srq);
-IB_UVERBS_DECLARE_CMD(query_srq);
-IB_UVERBS_DECLARE_CMD(destroy_srq);
-IB_UVERBS_DECLARE_CMD(create_xsrq);
-IB_UVERBS_DECLARE_CMD(open_xrcd);
-IB_UVERBS_DECLARE_CMD(close_xrcd);
-
-#define IB_UVERBS_DECLARE_EX_CMD(name) \
- int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
- struct ib_device *ib_dev, \
- struct ib_udata *ucore, \
- struct ib_udata *uhw)
-
-IB_UVERBS_DECLARE_EX_CMD(create_flow);
-IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
-IB_UVERBS_DECLARE_EX_CMD(query_device);
-IB_UVERBS_DECLARE_EX_CMD(create_cq);
-IB_UVERBS_DECLARE_EX_CMD(create_qp);
-IB_UVERBS_DECLARE_EX_CMD(create_wq);
-IB_UVERBS_DECLARE_EX_CMD(modify_wq);
-IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
-IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec);
+
+/*
+ * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
+ * PortInfo CapabilityMask, but was extended with unique bits.
+ */
+static inline u32 make_port_cap_flags(const struct ib_port_attr *attr)
+{
+ u32 res;
+
+ /* All IBA CapabilityMask bits are passed through here, except bit 26,
+ * which is overridden with IP_BASED_GIDS. This is due to a historical
+ * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
+ * bits match the IBA definition across all kernel versions.
+ */
+ res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ if (attr->ip_gids)
+ res |= IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ return res;
+}
+
+void copy_port_attr_to_resp(struct ib_port_attr *attr,
+ struct ib_uverbs_query_port_resp *resp,
+ struct ib_device *ib_dev, u8 port_num);
#endif /* UVERBS_H */
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -70,7 +70,7 @@
ah->last_send = 0;
kref_init(&ah->ref);
- ah->ah = ib_create_ah(pd, attr);
+ ah->ah = ib_create_ah(pd, attr, RDMA_CREATE_AH_SLEEPABLE);
if (IS_ERR(ah->ah)) {
kfree(ah);
ah = NULL;
@@ -572,7 +572,7 @@
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
- ib_destroy_ah(ah->ah);
+ ib_destroy_ah(ah->ah, 0);
kfree(ah);
}
diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
--- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
+++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
@@ -750,12 +750,8 @@
}
if (ssk->rx_ring.cq) {
- if (ib_destroy_cq(ssk->rx_ring.cq)) {
- sdp_warn(ssk->socket, "destroy cq(%p) failed\n",
- ssk->rx_ring.cq);
- } else {
- ssk->rx_ring.cq = NULL;
- }
+ ib_destroy_cq(ssk->rx_ring.cq);
+ ssk->rx_ring.cq = NULL;
}
WARN_ON(ring_head(ssk->rx_ring) != ring_tail(ssk->rx_ring));
diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c
--- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c
+++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c
@@ -476,12 +476,8 @@
}
if (ssk->tx_ring.cq) {
- if (ib_destroy_cq(ssk->tx_ring.cq)) {
- sdp_warn(ssk->socket, "destroy cq(%p) failed\n",
- ssk->tx_ring.cq);
- } else {
- ssk->tx_ring.cq = NULL;
- }
+ ib_destroy_cq(ssk->tx_ring.cq);
+ ssk->tx_ring.cq = NULL;
}
WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring));
diff --git a/sys/ofed/include/rdma/ib_verbs.h b/sys/ofed/include/rdma/ib_verbs.h
--- a/sys/ofed/include/rdma/ib_verbs.h
+++ b/sys/ofed/include/rdma/ib_verbs.h
@@ -60,18 +60,31 @@
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include <linux/netdevice.h>
+#include <linux/xarray.h>
#include <netinet/ip.h>
+#include <uapi/rdma/ib_user_verbs.h>
+#include <rdma/signature.h>
+#include <uapi/rdma/rdma_user_ioctl.h>
+#include <uapi/rdma/ib_user_ioctl_verbs.h>
#include <asm/atomic.h>
#include <asm/uaccess.h>
+struct ib_uqp_object;
+struct ib_usrq_object;
+struct ib_uwq_object;
struct ifla_vf_info;
struct ifla_vf_stats;
struct ib_uverbs_file;
+struct uverbs_attr_bundle;
+
+enum ib_uverbs_advise_mr_advice;
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
+struct ib_ucq_object;
+
union ib_gid {
u8 raw[16];
struct {
@@ -231,17 +244,6 @@
IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
};
-enum ib_signature_prot_cap {
- IB_PROT_T10DIF_TYPE_1 = 1,
- IB_PROT_T10DIF_TYPE_2 = 1 << 1,
- IB_PROT_T10DIF_TYPE_3 = 1 << 2,
-};
-
-enum ib_signature_guard_cap {
- IB_GUARD_T10DIF_CRC = 1,
- IB_GUARD_T10DIF_CSUM = 1 << 1,
-};
-
enum ib_atomic_cap {
IB_ATOMIC_NONE,
IB_ATOMIC_HCA,
@@ -266,6 +268,7 @@
uint32_t rc_odp_caps;
uint32_t uc_odp_caps;
uint32_t ud_odp_caps;
+ uint32_t xrc_odp_caps;
} per_transport_caps;
};
@@ -279,6 +282,24 @@
u32 max_rwq_indirection_table_size;
};
+enum ib_tm_cap_flags {
+ /* Support tag matching with rendezvous offload for RC transport */
+ IB_TM_CAP_RNDV_RC = 1 << 0,
+};
+
+struct ib_tm_caps {
+ /* Max size of RNDV header */
+ u32 max_rndv_hdr_size;
+ /* Max number of entries in tag matching list */
+ u32 max_num_tags;
+ /* From enum ib_tm_cap_flags */
+ u32 flags;
+ /* Max number of outstanding list operations */
+ u32 max_ops;
+ /* Max number of SGE in tag matching entry */
+ u32 max_sge;
+};
+
enum ib_cq_creation_flags {
IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
@@ -290,6 +311,27 @@
u32 flags;
};
+enum ib_cq_attr_mask {
+ IB_CQ_MODERATE = 1 << 0,
+};
+
+struct ib_cq_caps {
+ u16 max_cq_moderation_count;
+ u16 max_cq_moderation_period;
+};
+
+struct ib_dm_mr_attr {
+ u64 length;
+ u64 offset;
+ u32 access_flags;
+};
+
+struct ib_dm_alloc_attr {
+ u64 length;
+ u32 alignment;
+ u32 flags;
+};
+
struct ib_device_attr {
u64 fw_ver;
__be64 sys_image_guid;
@@ -327,7 +369,11 @@
int max_map_per_fmr;
int max_srq;
int max_srq_wr;
- int max_srq_sge;
+ union {
+ int max_srq_sge;
+ int max_send_sge;
+ int max_recv_sge;
+ };
unsigned int max_fast_reg_page_list_len;
u16 max_pkeys;
u8 local_ca_ack_delay;
@@ -338,6 +384,12 @@
uint64_t hca_core_clock; /* in KHZ */
struct ib_rss_caps rss_caps;
u32 max_wq_type_rq;
+ u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
+ struct ib_tm_caps tm_caps;
+ struct ib_cq_caps cq_caps;
+ u64 max_dm_size;
+ /* Max entries for sgl for optimized performance per READ */
+ u32 max_sgl_rd;
};
enum ib_mtu {
@@ -540,6 +592,8 @@
enum ib_mtu max_mtu;
enum ib_mtu active_mtu;
int gid_tbl_len;
+ unsigned int ip_gids:1;
+ /* This is the value from PortInfo CapabilityMask, defined by IBA */
u32 port_cap_flags;
u32 max_msg_sz;
u32 bad_pkey_cntr;
@@ -716,105 +770,26 @@
* enum ib_mr_type - memory region type
* @IB_MR_TYPE_MEM_REG: memory region that is used for
* normal registration
- * @IB_MR_TYPE_SIGNATURE: memory region that is used for
- * signature operations (data-integrity
- * capable regions)
* @IB_MR_TYPE_SG_GAPS: memory region that is capable to
* register any arbitrary sg lists (without
* the normal mr constraints - see
* ib_map_mr_sg)
+ * @IB_MR_TYPE_DM: memory region that is used for device
+ * memory registration
+ * @IB_MR_TYPE_USER: memory region that is used for the user-space
+ * application
+ * @IB_MR_TYPE_DMA: memory region that is used for DMA operations
+ * without address translations (VA=PA)
+ * @IB_MR_TYPE_INTEGRITY: memory region that is used for
+ * data integrity operations
*/
enum ib_mr_type {
IB_MR_TYPE_MEM_REG,
- IB_MR_TYPE_SIGNATURE,
IB_MR_TYPE_SG_GAPS,
-};
-
-/**
- * Signature types
- * IB_SIG_TYPE_NONE: Unprotected.
- * IB_SIG_TYPE_T10_DIF: Type T10-DIF
- */
-enum ib_signature_type {
- IB_SIG_TYPE_NONE,
- IB_SIG_TYPE_T10_DIF,
-};
-
-/**
- * Signature T10-DIF block-guard types
- * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
- * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
- */
-enum ib_t10_dif_bg_type {
- IB_T10DIF_CRC,
- IB_T10DIF_CSUM
-};
-
-/**
- * struct ib_t10_dif_domain - Parameters specific for T10-DIF
- * domain.
- * @bg_type: T10-DIF block guard type (CRC|CSUM)
- * @pi_interval: protection information interval.
- * @bg: seed of guard computation.
- * @app_tag: application tag of guard block
- * @ref_tag: initial guard block reference tag.
- * @ref_remap: Indicate wethear the reftag increments each block
- * @app_escape: Indicate to skip block check if apptag=0xffff
- * @ref_escape: Indicate to skip block check if reftag=0xffffffff
- * @apptag_check_mask: check bitmask of application tag.
- */
-struct ib_t10_dif_domain {
- enum ib_t10_dif_bg_type bg_type;
- u16 pi_interval;
- u16 bg;
- u16 app_tag;
- u32 ref_tag;
- bool ref_remap;
- bool app_escape;
- bool ref_escape;
- u16 apptag_check_mask;
-};
-
-/**
- * struct ib_sig_domain - Parameters for signature domain
- * @sig_type: specific signauture type
- * @sig: union of all signature domain attributes that may
- * be used to set domain layout.
- */
-struct ib_sig_domain {
- enum ib_signature_type sig_type;
- union {
- struct ib_t10_dif_domain dif;
- } sig;
-};
-
-/**
- * struct ib_sig_attrs - Parameters for signature handover operation
- * @check_mask: bitmask for signature byte check (8 bytes)
- * @mem: memory domain layout desciptor.
- * @wire: wire domain layout desciptor.
- */
-struct ib_sig_attrs {
- u8 check_mask;
- struct ib_sig_domain mem;
- struct ib_sig_domain wire;
-};
-
-enum ib_sig_err_type {
- IB_SIG_BAD_GUARD,
- IB_SIG_BAD_REFTAG,
- IB_SIG_BAD_APPTAG,
-};
-
-/**
- * struct ib_sig_err - signature error descriptor
- */
-struct ib_sig_err {
- enum ib_sig_err_type err_type;
- u32 expected;
- u32 actual;
- u64 sig_err_offset;
- u32 key;
+ IB_MR_TYPE_DM,
+ IB_MR_TYPE_USER,
+ IB_MR_TYPE_DMA,
+ IB_MR_TYPE_INTEGRITY,
};
enum ib_mr_status_check {
@@ -944,9 +919,16 @@
enum ib_srq_type {
IB_SRQT_BASIC,
- IB_SRQT_XRC
+ IB_SRQT_XRC,
+ IB_SRQT_TM,
};
+static inline bool ib_srq_has_cq(enum ib_srq_type srq_type)
+{
+ return srq_type == IB_SRQT_XRC ||
+ srq_type == IB_SRQT_TM;
+}
+
enum ib_srq_attr_mask {
IB_SRQ_MAX_WR = 1 << 0,
IB_SRQ_LIMIT = 1 << 1,
@@ -964,11 +946,17 @@
struct ib_srq_attr attr;
enum ib_srq_type srq_type;
- union {
- struct {
- struct ib_xrcd *xrcd;
- struct ib_cq *cq;
- } xrc;
+ struct {
+ struct ib_cq *cq;
+ union {
+ struct {
+ struct ib_xrcd *xrcd;
+ } xrc;
+
+ struct {
+ u32 max_num_tags;
+ } tag_matching;
+ };
} ext;
};
@@ -1010,6 +998,7 @@
IB_QPT_XRC_INI = 9,
IB_QPT_XRC_TGT,
IB_QPT_MAX,
+ IB_QPT_DRIVER = 0xFF,
/* Reserve a range for qp types internal to the low level driver.
* These qp types will not be visible at the IB core layer, so the
* IB_QPT_MAX usages should not be affected in the core layer
@@ -1036,6 +1025,9 @@
IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
IB_QP_CREATE_SCATTER_FCS = 1 << 8,
+ IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
+ IB_QP_CREATE_SOURCE_QPN = 1 << 10,
+ IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
@@ -1063,6 +1055,7 @@
*/
u8 port_num;
struct ib_rwq_ind_table *rwq_ind_tbl;
+ u32 source_qpn;
};
struct ib_qp_open_attr {
@@ -1336,13 +1329,19 @@
};
enum ib_access_flags {
- IB_ACCESS_LOCAL_WRITE = 1,
- IB_ACCESS_REMOTE_WRITE = (1<<1),
- IB_ACCESS_REMOTE_READ = (1<<2),
- IB_ACCESS_REMOTE_ATOMIC = (1<<3),
- IB_ACCESS_MW_BIND = (1<<4),
- IB_ZERO_BASED = (1<<5),
- IB_ACCESS_ON_DEMAND = (1<<6),
+ IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
+ IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
+ IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
+ IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
+ IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
+ IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
+ IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
+ IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
+ IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
+
+ IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
+ IB_ACCESS_SUPPORTED =
+ ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
};
/*
@@ -1378,57 +1377,48 @@
RDMA_REMOVE_ABORT,
};
+struct ib_rdmacg_object {
+};
+
struct ib_ucontext {
struct ib_device *device;
- struct list_head pd_list;
- struct list_head mr_list;
- struct list_head mw_list;
- struct list_head cq_list;
- struct list_head qp_list;
- struct list_head srq_list;
- struct list_head ah_list;
- struct list_head xrcd_list;
- struct list_head rule_list;
- struct list_head wq_list;
- struct list_head rwq_ind_tbl_list;
- int closing;
+ struct ib_uverbs_file *ufile;
+ /*
+ * 'closing' can be read by the driver only during a destroy callback,
+ * it is set when we are closing the file descriptor and indicates
+ * that mm_sem may be locked.
+ */
+ bool closing;
bool cleanup_retryable;
- pid_t tgid;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct rb_root umem_tree;
+ struct ib_rdmacg_object cg_obj;
/*
- * Protects .umem_rbroot and tree, as well as odp_mrs_count and
- * mmu notifiers registration.
+ * Implementation details of the RDMA core, don't use in drivers:
*/
- struct rw_semaphore umem_rwsem;
- void (*invalidate_range)(struct ib_umem *umem,
- unsigned long start, unsigned long end);
-
- struct mmu_notifier mn;
- atomic_t notifier_count;
- /* A list of umems that don't have private mmu notifier counters yet. */
- struct list_head no_private_counters;
- int odp_mrs_count;
-#endif
+ struct xarray mmap_xa;
};
struct ib_uobject {
u64 user_handle; /* handle given to us by userspace */
+ /* ufile & ucontext owning this object */
+ struct ib_uverbs_file *ufile;
+ /* FIXME, save memory: ufile->context == context */
struct ib_ucontext *context; /* associated user context */
void *object; /* containing object */
struct list_head list; /* link to context's list */
+ struct ib_rdmacg_object cg_obj; /* rdmacg object */
int id; /* index into kernel idr */
struct kref ref;
- struct rw_semaphore mutex; /* protects .live */
+ atomic_t usecnt; /* protects exclusive access */
struct rcu_head rcu; /* kfree_rcu() overhead */
- int live;
+
+ const struct uverbs_api_object *uapi_object;
};
struct ib_udata {
- const void __user *inbuf;
- void __user *outbuf;
+ const u8 __user *inbuf;
+ u8 __user *outbuf;
size_t inlen;
size_t outlen;
};
@@ -1473,7 +1463,7 @@
struct ib_cq {
struct ib_device *device;
- struct ib_uobject *uobject;
+ struct ib_ucq_object *uobject;
ib_comp_handler comp_handler;
void (*event_handler)(struct ib_event *, void *);
void *cq_context;
@@ -1486,18 +1476,20 @@
struct ib_srq {
struct ib_device *device;
struct ib_pd *pd;
- struct ib_uobject *uobject;
+ struct ib_usrq_object *uobject;
void (*event_handler)(struct ib_event *, void *);
void *srq_context;
enum ib_srq_type srq_type;
atomic_t usecnt;
- union {
- struct {
- struct ib_xrcd *xrcd;
- struct ib_cq *cq;
- u32 srq_num;
- } xrc;
+ struct {
+ struct ib_cq *cq;
+ union {
+ struct {
+ struct ib_xrcd *xrcd;
+ u32 srq_num;
+ } xrc;
+ };
} ext;
};
@@ -1513,7 +1505,7 @@
struct ib_wq {
struct ib_device *device;
- struct ib_uobject *uobject;
+ struct ib_uwq_object *uobject;
void *wq_context;
void (*event_handler)(struct ib_event *, void *);
struct ib_pd *pd;
@@ -1524,6 +1516,13 @@
atomic_t usecnt;
};
+enum ib_wq_flags {
+ IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0,
+ IB_WQ_FLAGS_SCATTER_FCS = 1 << 1,
+ IB_WQ_FLAGS_DELAY_DROP = 1 << 2,
+ IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3,
+};
+
struct ib_wq_init_attr {
void *wq_context;
enum ib_wq_type wq_type;
@@ -1531,16 +1530,20 @@
u32 max_sge;
struct ib_cq *cq;
void (*event_handler)(struct ib_event *, void *);
+ u32 create_flags; /* Use enum ib_wq_flags */
};
enum ib_wq_attr_mask {
- IB_WQ_STATE = 1 << 0,
- IB_WQ_CUR_STATE = 1 << 1,
+ IB_WQ_STATE = 1 << 0,
+ IB_WQ_CUR_STATE = 1 << 1,
+ IB_WQ_FLAGS = 1 << 2,
};
struct ib_wq_attr {
enum ib_wq_state wq_state;
enum ib_wq_state curr_wq_state;
+ u32 flags; /* Use enum ib_wq_flags */
+ u32 flags_mask; /* Use enum ib_wq_flags */
};
struct ib_rwq_ind_table {
@@ -1576,7 +1579,7 @@
atomic_t usecnt;
struct list_head open_list;
struct ib_qp *real_qp;
- struct ib_uobject *uobject;
+ struct ib_uqp_object *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
u32 qp_num;
@@ -1584,6 +1587,15 @@
u32 max_read_sge;
enum ib_qp_type qp_type;
struct ib_rwq_ind_table *rwq_ind_tbl;
+ u8 port;
+};
+
+struct ib_dm {
+ struct ib_device *device;
+ u32 length;
+ u32 flags;
+ struct ib_uobject *uobject;
+ atomic_t usecnt;
};
struct ib_mr {
@@ -1594,11 +1606,15 @@
u64 iova;
u64 length;
unsigned int page_size;
+ enum ib_mr_type type;
bool need_inval;
union {
struct ib_uobject *uobject; /* user */
struct list_head qp_entry; /* FR */
};
+
+ struct ib_dm *dm;
+ struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
};
struct ib_mw {
@@ -1636,17 +1652,27 @@
/* Supported steering header types */
enum ib_flow_spec_type {
/* L2 headers*/
- IB_FLOW_SPEC_ETH = 0x20,
- IB_FLOW_SPEC_IB = 0x22,
+ IB_FLOW_SPEC_ETH = 0x20,
+ IB_FLOW_SPEC_IB = 0x22,
/* L3 header*/
- IB_FLOW_SPEC_IPV4 = 0x30,
- IB_FLOW_SPEC_IPV6 = 0x31,
+ IB_FLOW_SPEC_IPV4 = 0x30,
+ IB_FLOW_SPEC_IPV6 = 0x31,
+ IB_FLOW_SPEC_ESP = 0x34,
/* L4 headers*/
- IB_FLOW_SPEC_TCP = 0x40,
- IB_FLOW_SPEC_UDP = 0x41
+ IB_FLOW_SPEC_TCP = 0x40,
+ IB_FLOW_SPEC_UDP = 0x41,
+ IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50,
+ IB_FLOW_SPEC_GRE = 0x51,
+ IB_FLOW_SPEC_MPLS = 0x60,
+ IB_FLOW_SPEC_INNER = 0x100,
+ /* Actions */
+ IB_FLOW_SPEC_ACTION_TAG = 0x1000,
+ IB_FLOW_SPEC_ACTION_DROP = 0x1001,
+ IB_FLOW_SPEC_ACTION_HANDLE = 0x1002,
+ IB_FLOW_SPEC_ACTION_COUNT = 0x1003,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
-#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 10
/* Flow steering rule priority is set according to it's domain.
* Lower domain value means higher priority.
@@ -1751,9 +1777,94 @@
struct ib_flow_tcp_udp_filter mask;
};
+struct ib_flow_tunnel_filter {
+ __be32 tunnel_id;
+ u8 real_sz[0];
+};
+
+/* ib_flow_spec_tunnel describes the Vxlan tunnel
+ * the tunnel_id from val has the vni value
+ */
+struct ib_flow_spec_tunnel {
+ u32 type;
+ u16 size;
+ struct ib_flow_tunnel_filter val;
+ struct ib_flow_tunnel_filter mask;
+};
+
+struct ib_flow_esp_filter {
+ __be32 spi;
+ __be32 seq;
+ /* Must be last */
+ u8 real_sz[0];
+};
+
+struct ib_flow_spec_esp {
+ u32 type;
+ u16 size;
+ struct ib_flow_esp_filter val;
+ struct ib_flow_esp_filter mask;
+};
+
+struct ib_flow_gre_filter {
+ __be16 c_ks_res0_ver;
+ __be16 protocol;
+ __be32 key;
+ /* Must be last */
+ u8 real_sz[0];
+};
+
+struct ib_flow_spec_gre {
+ u32 type;
+ u16 size;
+ struct ib_flow_gre_filter val;
+ struct ib_flow_gre_filter mask;
+};
+
+struct ib_flow_mpls_filter {
+ __be32 tag;
+ /* Must be last */
+ u8 real_sz[0];
+};
+
+struct ib_flow_spec_mpls {
+ u32 type;
+ u16 size;
+ struct ib_flow_mpls_filter val;
+ struct ib_flow_mpls_filter mask;
+};
+
+struct ib_flow_spec_action_tag {
+ enum ib_flow_spec_type type;
+ u16 size;
+ u32 tag_id;
+};
+
+struct ib_flow_spec_action_drop {
+ enum ib_flow_spec_type type;
+ u16 size;
+};
+
+struct ib_flow_spec_action_handle {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_action *act;
+};
+
+enum ib_counters_description {
+ IB_COUNTER_PACKETS,
+ IB_COUNTER_BYTES,
+};
+
+struct ib_flow_spec_action_count {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_counters *counters;
+};
+
union ib_flow_spec {
struct {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
};
struct ib_flow_spec_eth eth;
@@ -1761,6 +1872,14 @@
struct ib_flow_spec_ipv4 ipv4;
struct ib_flow_spec_tcp_udp tcp_udp;
struct ib_flow_spec_ipv6 ipv6;
+ struct ib_flow_spec_tunnel tunnel;
+ struct ib_flow_spec_esp esp;
+ struct ib_flow_spec_gre gre;
+ struct ib_flow_spec_mpls mpls;
+ struct ib_flow_spec_action_tag flow_tag;
+ struct ib_flow_spec_action_drop drop;
+ struct ib_flow_spec_action_handle action;
+ struct ib_flow_spec_action_count flow_count;
};
struct ib_flow_attr {
@@ -1770,17 +1889,74 @@
u32 flags;
u8 num_of_specs;
u8 port;
- /* Following are the optional layers according to user request
- * struct ib_flow_spec_xxx
- * struct ib_flow_spec_yyy
- */
+ union ib_flow_spec flows[0];
};
struct ib_flow {
struct ib_qp *qp;
+ struct ib_device *device;
struct ib_uobject *uobject;
};
+enum ib_flow_action_type {
+ IB_FLOW_ACTION_UNSPECIFIED,
+ IB_FLOW_ACTION_ESP = 1,
+};
+
+struct ib_flow_action_attrs_esp_keymats {
+ enum ib_uverbs_flow_action_esp_keymat protocol;
+ union {
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm aes_gcm;
+ } keymat;
+};
+
+struct ib_flow_action_attrs_esp_replays {
+ enum ib_uverbs_flow_action_esp_replay protocol;
+ union {
+ struct ib_uverbs_flow_action_esp_replay_bmp bmp;
+ } replay;
+};
+
+enum ib_flow_action_attrs_esp_flags {
+ /* All user-space flags at the top: Use enum ib_uverbs_flow_action_esp_flags
+ * This is done in order to share the same flags between user-space and
+ * kernel and spare an unnecessary translation.
+ */
+
+ /* Kernel flags */
+ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED = 1ULL << 32,
+ IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS = 1ULL << 33,
+};
+
+struct ib_flow_spec_list {
+ struct ib_flow_spec_list *next;
+ union ib_flow_spec spec;
+};
+
+struct ib_flow_action_attrs_esp {
+ struct ib_flow_action_attrs_esp_keymats *keymat;
+ struct ib_flow_action_attrs_esp_replays *replay;
+ struct ib_flow_spec_list *encap;
+ /* Used only if IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED is enabled.
+ * Value of 0 is a valid value.
+ */
+ u32 esn;
+ u32 spi;
+ u32 seq;
+ u32 tfc_pad;
+ /* Use enum ib_flow_action_attrs_esp_flags */
+ u64 flags;
+ u64 hard_limit_pkts;
+};
+
+struct ib_flow_action {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ enum ib_flow_action_type type;
+ atomic_t usecnt;
+};
+
+
struct ib_mad_hdr;
struct ib_grh;
@@ -1863,8 +2039,71 @@
u32 max_mad_size;
};
+struct ib_counters {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ /* num of objects attached */
+ atomic_t usecnt;
+};
+
+struct ib_counters_read_attr {
+ u64 *counters_buff;
+ u32 ncounters;
+ u32 flags; /* use enum ib_read_counters_flags */
+};
+
+#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \
+ .size_##ib_struct = \
+ (sizeof(struct drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \
+ BUILD_BUG_ON_ZERO( \
+ !__same_type(((struct drv_struct *)NULL)->member, \
+ struct ib_struct)))
+
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \
+ ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
+
+#define rdma_zalloc_drv_obj(ib_dev, ib_type) \
+ rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
+
+#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
+
+struct rdma_user_mmap_entry {
+ struct kref ref;
+ struct ib_ucontext *ucontext;
+ unsigned long start_pgoff;
+ size_t npages;
+ bool driver_removed;
+};
+
+/* Return the offset (in bytes) the user should pass to libc's mmap() */
+static inline u64
+rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry)
+{
+ return (u64)entry->start_pgoff << PAGE_SHIFT;
+}
+
+struct ib_device_ops {
+ enum rdma_driver_id driver_id;
+ DECLARE_RDMA_OBJ_SIZE(ib_ah);
+ DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_pd);
+ DECLARE_RDMA_OBJ_SIZE(ib_srq);
+ DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
+};
+
+#define INIT_IB_DEVICE_OPS(pop, driver, DRIVER) do { \
+ (pop)[0] .driver_id = RDMA_DRIVER_##DRIVER; \
+ (pop)[0] INIT_RDMA_OBJ_SIZE(ib_ah, driver##_ib_ah, ibah); \
+ (pop)[0] INIT_RDMA_OBJ_SIZE(ib_cq, driver##_ib_cq, ibcq); \
+ (pop)[0] INIT_RDMA_OBJ_SIZE(ib_pd, driver##_ib_pd, ibpd); \
+ (pop)[0] INIT_RDMA_OBJ_SIZE(ib_srq, driver##_ib_srq, ibsrq); \
+ (pop)[0] INIT_RDMA_OBJ_SIZE(ib_ucontext, driver##_ib_ucontext, ibucontext); \
+} while (0)
+
struct ib_device {
struct device *dma_device;
+ struct ib_device_ops ops;
char name[IB_DEVICE_NAME_MAX];
@@ -1967,24 +2206,22 @@
int (*modify_port)(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify);
- struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
+ int (*alloc_ucontext)(struct ib_ucontext *uctx,
struct ib_udata *udata);
- int (*dealloc_ucontext)(struct ib_ucontext *context);
+ void (*dealloc_ucontext)(struct ib_ucontext *context);
int (*mmap)(struct ib_ucontext *context,
struct vm_area_struct *vma);
- struct ib_pd * (*alloc_pd)(struct ib_device *device,
- struct ib_ucontext *context,
+ int (*alloc_pd)(struct ib_pd *pd,
struct ib_udata *udata);
- int (*dealloc_pd)(struct ib_pd *pd);
- struct ib_ah * (*create_ah)(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
- struct ib_udata *udata);
+ void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ int (*create_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr,
+ u32 flags, struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah,
struct ib_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah,
struct ib_ah_attr *ah_attr);
- int (*destroy_ah)(struct ib_ah *ah);
- struct ib_srq * (*create_srq)(struct ib_pd *pd,
+ void (*destroy_ah)(struct ib_ah *ah, u32 flags);
+ int (*create_srq)(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
int (*modify_srq)(struct ib_srq *srq,
@@ -1993,7 +2230,7 @@
struct ib_udata *udata);
int (*query_srq)(struct ib_srq *srq,
struct ib_srq_attr *srq_attr);
- int (*destroy_srq)(struct ib_srq *srq);
+ void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
int (*post_srq_recv)(struct ib_srq *srq,
const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
@@ -2008,20 +2245,19 @@
struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
- int (*destroy_qp)(struct ib_qp *qp);
+ int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
int (*post_send)(struct ib_qp *qp,
const struct ib_send_wr *send_wr,
const struct ib_send_wr **bad_send_wr);
int (*post_recv)(struct ib_qp *qp,
const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
- struct ib_cq * (*create_cq)(struct ib_device *device,
+ int (*create_cq)(struct ib_cq *,
const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count,
u16 cq_period);
- int (*destroy_cq)(struct ib_cq *cq);
+ void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
int (*resize_cq)(struct ib_cq *cq, int cqe,
struct ib_udata *udata);
int (*poll_cq)(struct ib_cq *cq, int num_entries,
@@ -2045,10 +2281,13 @@
int mr_access_flags,
struct ib_pd *pd,
struct ib_udata *udata);
- int (*dereg_mr)(struct ib_mr *mr);
- struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg);
+ int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
+ struct ib_mr * (*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata);
+ int (*advise_mr)(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice, u32 flags,
+ const struct ib_sge *sg_list, u32 num_sge,
+ struct uverbs_attr_bundle *attrs);
int (*map_mr_sg)(struct ib_mr *mr,
struct scatterlist *sg,
int sg_nents,
@@ -2082,16 +2321,31 @@
size_t *out_mad_size,
u16 *out_mad_pkey_index);
struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
- struct ib_ucontext *ucontext,
struct ib_udata *udata);
- int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
struct ib_flow * (*create_flow)(struct ib_qp *qp,
struct ib_flow_attr
*flow_attr,
- int domain);
+ int domain, struct ib_udata *udata);
int (*destroy_flow)(struct ib_flow *flow_id);
+ struct ib_flow_action *(*create_flow_action_esp)(
+ struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_flow_action)(struct ib_flow_action *action);
+ int (*modify_flow_action_esp)(
+ struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+ /**
+ * This will be called once refcount of an entry in mmap_xa reaches
+ * zero. The type of the memory that was mapped may differ between
+ * entries and is opaque to the rdma_user_mmap interface.
+ * Therefore needs to be implemented by the driver in mmap_free.
+ */
+ void (*mmap_free)(struct rdma_user_mmap_entry *entry);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
void (*drain_rq)(struct ib_qp *qp);
void (*drain_sq)(struct ib_qp *qp);
@@ -2106,7 +2360,7 @@
struct ib_wq * (*create_wq)(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
- int (*destroy_wq)(struct ib_wq *wq);
+ void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
int (*modify_wq)(struct ib_wq *wq,
struct ib_wq_attr *attr,
u32 wq_attr_mask,
@@ -2115,6 +2369,20 @@
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+ struct ib_dm *(*alloc_dm)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
+ struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ struct ib_counters *(*create_counters)(
+ struct ib_device *device, struct uverbs_attr_bundle *attrs);
+ int (*destroy_counters)(struct ib_counters *counters);
+ int (*read_counters)(struct ib_counters *counters,
+ struct ib_counters_read_attr *counters_read_attr,
+ struct uverbs_attr_bundle *attrs);
struct ib_dma_mapping_ops *dma_ops;
struct module *owner;
@@ -2142,6 +2410,8 @@
struct attribute_group *hw_stats_ag;
struct rdma_hw_stats *hw_stats;
+ const struct uapi_definition *driver_def;
+
/**
* The following mandatory functions are used only at device
* registration. Keep functions such as these at the end of this
@@ -2199,6 +2469,26 @@
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot,
+ struct rdma_user_mmap_entry *entry);
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length);
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 min_pgoff,
+ u32 max_pgoff);
+
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
+ unsigned long pgoff);
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma);
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry);
+
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry);
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
@@ -2209,11 +2499,9 @@
return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
}
-static inline bool ib_is_udata_cleared(struct ib_udata *udata,
- size_t offset,
- size_t len)
+static inline bool ib_is_buffer_cleared(const void __user *p,
+ size_t len)
{
- const void __user *p = (const char __user *)udata->inbuf + offset;
bool ret;
u8 *buf;
@@ -2229,6 +2517,13 @@
return ret;
}
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+ size_t offset,
+ size_t len)
+{
+ return ib_is_buffer_cleared(udata->inbuf + offset, len);
+}
+
/**
* ib_is_destroy_retryable - Check whether the uobject destruction
* is retryable.
@@ -2658,17 +2953,57 @@
const char *caller);
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), __func__)
-void ib_dealloc_pd(struct ib_pd *pd);
+
+/**
+ * ib_dealloc_pd_user - Deallocate kernel/user PD
+ * @pd: The protection domain
+ * @udata: Valid user data or NULL for kernel objects
+ */
+void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+
+/**
+ * ib_dealloc_pd - Deallocate kernel PD
+ * @pd: The protection domain
+ *
+ * NOTE: for user PD use ib_dealloc_pd_user with valid udata!
+ */
+static inline void ib_dealloc_pd(struct ib_pd *pd)
+{
+ ib_dealloc_pd_user(pd, NULL);
+}
+
+enum rdma_create_ah_flags {
+ /* In a sleepable context */
+ RDMA_CREATE_AH_SLEEPABLE = BIT(0),
+};
/**
* ib_create_ah - Creates an address handle for the given address vector.
* @pd: The protection domain associated with the address handle.
* @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
+ *
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ u32 flags);
+
+/**
+ * ib_create_user_ah - Creates an address handle for the given address vector.
+ * It resolves destination mac address for ah attribute of RoCE type.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ * @udata: pointer to user's input output buffer information need by
+ * provider driver.
*
+ * It returns 0 on success and returns appropriate error code on error.
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
-struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *ib_create_user_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
/**
* ib_init_ah_from_wc - Initializes address handle attributes from a
@@ -2718,11 +3053,30 @@
*/
int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+enum rdma_destroy_ah_flags {
+ /* In a sleepable context */
+ RDMA_DESTROY_AH_SLEEPABLE = BIT(0),
+};
+
/**
- * ib_destroy_ah - Destroys an address handle.
+ * ib_destroy_ah_user - Destroys an address handle.
* @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ * @udata: Valid user data or NULL for kernel objects
*/
-int ib_destroy_ah(struct ib_ah *ah);
+int ib_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
+
+/**
+ * rdma_destroy_ah - Destroys an kernel address handle.
+ * @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ *
+ * NOTE: for user ah use ib_destroy_ah_user with valid udata!
+ */
+static inline int ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return ib_destroy_ah_user(ah, flags, NULL);
+}
/**
* ib_create_srq - Creates a SRQ associated with the specified protection
@@ -2766,10 +3120,22 @@
struct ib_srq_attr *srq_attr);
/**
- * ib_destroy_srq - Destroys the specified SRQ.
+ * ib_destroy_srq_user - Destroys the specified SRQ.
* @srq: The SRQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
*/
-int ib_destroy_srq(struct ib_srq *srq);
+int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_srq - Destroys the specified kernel SRQ.
+ * @srq: The SRQ to destroy.
+ *
+ * NOTE: for user srq use ib_destroy_srq_user with valid udata!
+ */
+static inline int ib_destroy_srq(struct ib_srq *srq)
+{
+ return ib_destroy_srq_user(srq, NULL);
+}
/**
* ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
@@ -2796,6 +3162,22 @@
struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr);
+/**
+ * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
+ * @qp: The QP to modify.
+ * @attr: On input, specifies the QP attributes to modify. On output,
+ * the current values of selected QP attributes are returned.
+ * @attr_mask: A bit-mask used to specify which attributes of the QP
+ * are being modified.
+ * @udata: pointer to user's input output buffer information
+ * are being modified.
+ * It returns 0 on success and returns appropriate error code on error.
+ */
+int ib_modify_qp_with_udata(struct ib_qp *qp,
+ struct ib_qp_attr *attr,
+ int attr_mask,
+ struct ib_udata *udata);
+
/**
* ib_modify_qp - Modifies the attributes for the specified QP and then
* transitions the QP to the given state.
@@ -2828,8 +3210,20 @@
/**
* ib_destroy_qp - Destroys the specified QP.
* @qp: The QP to destroy.
+ * @udata: Valid udata or NULL for kernel objects
+ */
+int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata);
+
+/**
+ * ib_destroy_qp - Destroys the specified kernel QP.
+ * @qp: The QP to destroy.
+ *
+ * NOTE: for user qp use ib_destroy_qp_user with valid udata!
*/
-int ib_destroy_qp(struct ib_qp *qp);
+static inline int ib_destroy_qp(struct ib_qp *qp)
+{
+ return ib_destroy_qp_user(qp, NULL);
+}
/**
* ib_open_qp - Obtain a reference to an existing sharable QP.
@@ -2885,9 +3279,65 @@
return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
}
-struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx);
-void ib_free_cq(struct ib_cq *cq);
+struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx,
+ const char *caller, struct ib_udata *udata);
+
+/**
+ * ib_alloc_cq_user: Allocate kernel/user CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ * @udata: Valid user data or NULL for kernel objects
+ */
+static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
+ void *private, int nr_cqe,
+ int comp_vector,
+ enum ib_poll_context poll_ctx,
+ struct ib_udata *udata)
+{
+ return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+ "ibcore", udata);
+}
+
+/**
+ * ib_alloc_cq: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ *
+ * NOTE: for user cq use ib_alloc_cq_user with valid udata!
+ */
+static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx)
+{
+ return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+ NULL);
+}
+
+/**
+ * ib_free_cq_user - Free kernel/user CQ
+ * @cq: The CQ to free
+ * @udata: Valid user data or NULL for kernel objects
+ */
+void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_free_cq - Free kernel CQ
+ * @cq: The CQ to free
+ *
+ * NOTE: for user cq use ib_free_cq_user with valid udata!
+ */
+static inline void ib_free_cq(struct ib_cq *cq)
+{
+ ib_free_cq_user(cq, NULL);
+}
/**
* ib_create_cq - Creates a CQ on the specified device.
@@ -2902,11 +3352,14 @@
*
* Users can examine the cq structure to determine the actual CQ size.
*/
-struct ib_cq *ib_create_cq(struct ib_device *device,
- ib_comp_handler comp_handler,
- void (*event_handler)(struct ib_event *, void *),
- void *cq_context,
- const struct ib_cq_init_attr *cq_attr);
+struct ib_cq *__ib_create_cq(struct ib_device *device,
+ ib_comp_handler comp_handler,
+ void (*event_handler)(struct ib_event *, void *),
+ void *cq_context,
+ const struct ib_cq_init_attr *cq_attr,
+ const char *caller);
+#define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \
+ __ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), "ibcore")
/**
* ib_resize_cq - Modifies the capacity of the CQ.
@@ -2927,10 +3380,22 @@
int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
/**
- * ib_destroy_cq - Destroys the specified CQ.
+ * ib_destroy_cq_user - Destroys the specified CQ.
* @cq: The CQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
*/
-int ib_destroy_cq(struct ib_cq *cq);
+int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_cq - Destroys the specified kernel CQ.
+ * @cq: The CQ to destroy.
+ *
+ * NOTE: for user cq use ib_destroy_cq_user with valid udata!
+ */
+static inline void ib_destroy_cq(struct ib_cq *cq)
+{
+ ib_destroy_cq_user(cq, NULL);
+}
/**
* ib_poll_cq - poll a CQ for completion(s)
@@ -3278,11 +3743,34 @@
*
* This function can fail, if the memory region has memory windows bound to it.
*/
-int ib_dereg_mr(struct ib_mr *mr);
+int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata);
-struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg);
+/**
+ * ib_dereg_mr - Deregisters a kernel memory region and removes it from the
+ * HCA translation table.
+ * @mr: The memory region to deregister.
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ *
+ * NOTE: for user mr use ib_dereg_mr_user with valid udata!
+ */
+static inline int ib_dereg_mr(struct ib_mr *mr)
+{
+ return ib_dereg_mr_user(mr, NULL);
+}
+
+struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata);
+
+static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type, u32 max_num_sg)
+{
+ return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL);
+}
+
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg);
/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
@@ -3371,18 +3859,18 @@
/**
* ib_alloc_xrcd - Allocates an XRC domain.
* @device: The device on which to allocate the XRC domain.
+ * @caller: Module name for kernel consumers
*/
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
+struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
+#define ib_alloc_xrcd(device) \
+ __ib_alloc_xrcd((device), "ibcore")
/**
* ib_dealloc_xrcd - Deallocates an XRC domain.
* @xrcd: The XRC domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
*/
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
-
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr, int domain);
-int ib_destroy_flow(struct ib_flow *flow_id);
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
static inline int ib_check_mr_access(int flags)
{
@@ -3394,9 +3882,26 @@
!(flags & IB_ACCESS_LOCAL_WRITE))
return -EINVAL;
+ if (flags & ~IB_ACCESS_SUPPORTED)
+ return -EINVAL;
+
return 0;
}
+static inline bool ib_access_writable(int access_flags)
+{
+ /*
+ * We have writable memory backing the MR if any of the following
+ * access flags are set. "Local write" and "remote write" obviously
+ * require write access. "Remote atomic" can do things like fetch and
+ * add, which will modify memory, and "MW bind" can change permissions
+ * by binding a window.
+ */
+ return access_flags &
+ (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND);
+}
+
/**
* ib_check_mr_status: lightweight check of MR status.
* This routine may provide status checks on a selected
@@ -3417,7 +3922,7 @@
const struct sockaddr *addr);
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq);
+int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask);
struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
@@ -3449,6 +3954,8 @@
struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
+
int ib_resolve_eth_dmac(struct ib_device *device,
struct ib_ah_attr *ah_attr);
#endif /* IB_VERBS_H */
diff --git a/sys/ofed/include/rdma/signature.h b/sys/ofed/include/rdma/signature.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/rdma/signature.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_SIGNATURE_H_
+#define _RDMA_SIGNATURE_H_
+
+#include <linux/types.h>
+
+enum ib_signature_prot_cap {
+ IB_PROT_T10DIF_TYPE_1 = 1,
+ IB_PROT_T10DIF_TYPE_2 = 1 << 1,
+ IB_PROT_T10DIF_TYPE_3 = 1 << 2,
+};
+
+enum ib_signature_guard_cap {
+ IB_GUARD_T10DIF_CRC = 1,
+ IB_GUARD_T10DIF_CSUM = 1 << 1,
+};
+
+/**
+ * enum ib_signature_type - Signature types
+ * @IB_SIG_TYPE_NONE: Unprotected.
+ * @IB_SIG_TYPE_T10_DIF: Type T10-DIF
+ */
+enum ib_signature_type {
+ IB_SIG_TYPE_NONE,
+ IB_SIG_TYPE_T10_DIF,
+};
+
+/**
+ * enum ib_t10_dif_bg_type - Signature T10-DIF block-guard types
+ * @IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
+ * @IB_T10DIF_CSUM: Corresponds to IP checksum rules.
+ */
+enum ib_t10_dif_bg_type {
+ IB_T10DIF_CRC,
+ IB_T10DIF_CSUM,
+};
+
+/**
+ * struct ib_t10_dif_domain - Parameters specific for T10-DIF
+ * domain.
+ * @bg_type: T10-DIF block guard type (CRC|CSUM)
+ * @pi_interval: protection information interval.
+ * @bg: seed of guard computation.
+ * @app_tag: application tag of guard block
+ * @ref_tag: initial guard block reference tag.
+ * @ref_remap: Indicate wethear the reftag increments each block
+ * @app_escape: Indicate to skip block check if apptag=0xffff
+ * @ref_escape: Indicate to skip block check if reftag=0xffffffff
+ * @apptag_check_mask: check bitmask of application tag.
+ */
+struct ib_t10_dif_domain {
+ enum ib_t10_dif_bg_type bg_type;
+ u16 pi_interval;
+ u16 bg;
+ u16 app_tag;
+ u32 ref_tag;
+ bool ref_remap;
+ bool app_escape;
+ bool ref_escape;
+ u16 apptag_check_mask;
+};
+
+/**
+ * struct ib_sig_domain - Parameters for signature domain
+ * @sig_type: specific signauture type
+ * @sig: union of all signature domain attributes that may
+ * be used to set domain layout.
+ */
+struct ib_sig_domain {
+ enum ib_signature_type sig_type;
+ union {
+ struct ib_t10_dif_domain dif;
+ } sig;
+};
+
+/**
+ * struct ib_sig_attrs - Parameters for signature handover operation
+ * @check_mask: bitmask for signature byte check (8 bytes)
+ * @mem: memory domain layout descriptor.
+ * @wire: wire domain layout descriptor.
+ * @meta_length: metadata length
+ */
+struct ib_sig_attrs {
+ u8 check_mask;
+ struct ib_sig_domain mem;
+ struct ib_sig_domain wire;
+ int meta_length;
+};
+
+enum ib_sig_err_type {
+ IB_SIG_BAD_GUARD,
+ IB_SIG_BAD_REFTAG,
+ IB_SIG_BAD_APPTAG,
+};
+
+/*
+ * Signature check masks (8 bytes in total) according to the T10-PI standard:
+ * -------- -------- ------------
+ * | GUARD | APPTAG | REFTAG |
+ * | 2B | 2B | 4B |
+ * -------- -------- ------------
+ */
+enum {
+ IB_SIG_CHECK_GUARD = 0xc0,
+ IB_SIG_CHECK_APPTAG = 0x30,
+ IB_SIG_CHECK_REFTAG = 0x0f,
+};
+
+/*
+ * struct ib_sig_err - signature error descriptor
+ */
+struct ib_sig_err {
+ enum ib_sig_err_type err_type;
+ u32 expected;
+ u32 actual;
+ u64 sig_err_offset;
+ u32 key;
+};
+
+#endif /* _RDMA_SIGNATURE_H_ */
diff --git a/sys/ofed/include/rdma/uverbs_ioctl.h b/sys/ofed/include/rdma/uverbs_ioctl.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/rdma/uverbs_ioctl.h
@@ -0,0 +1,958 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UVERBS_IOCTL_
+#define _UVERBS_IOCTL_
+
+#include <rdma/uverbs_types.h>
+#include <linux/uaccess.h>
+#include <rdma/rdma_user_ioctl.h>
+#include <rdma/ib_user_ioctl_verbs.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+
+/*
+ * =======================================
+ * Verbs action specifications
+ * =======================================
+ */
+
+enum uverbs_attr_type {
+ UVERBS_ATTR_TYPE_NA,
+ UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_TYPE_PTR_OUT,
+ UVERBS_ATTR_TYPE_IDR,
+ UVERBS_ATTR_TYPE_FD,
+ UVERBS_ATTR_TYPE_ENUM_IN,
+ UVERBS_ATTR_TYPE_IDRS_ARRAY,
+};
+
+enum uverbs_obj_access {
+ UVERBS_ACCESS_READ,
+ UVERBS_ACCESS_WRITE,
+ UVERBS_ACCESS_NEW,
+ UVERBS_ACCESS_DESTROY
+};
+
+/* Specification of a single attribute inside the ioctl message */
+/* good size 16 */
+struct uverbs_attr_spec {
+ u8 type;
+
+ /*
+ * Support extending attributes by length. Allow the user to provide
+ * more bytes than ptr.len, but check that everything after is zero'd
+ * by the user.
+ */
+ u8 zero_trailing:1;
+ /*
+ * Valid only for PTR_IN. Allocate and copy the data inside
+ * the parser
+ */
+ u8 alloc_and_copy:1;
+ u8 mandatory:1;
+ /* True if this is from UVERBS_ATTR_UHW */
+ u8 is_udata:1;
+
+ union {
+ struct {
+ /* Current known size to kernel */
+ u16 len;
+ /* User isn't allowed to provide something < min_len */
+ u16 min_len;
+ } ptr;
+
+ struct {
+ /*
+ * higher bits mean the namespace and lower bits mean
+ * the type id within the namespace.
+ */
+ u16 obj_type;
+ u8 access;
+ } obj;
+
+ struct {
+ u8 num_elems;
+ } enum_def;
+ } u;
+
+ /* This weird split lets us remove some padding */
+ union {
+ struct {
+ /*
+ * The enum attribute can select one of the attributes
+ * contained in the ids array. Currently only PTR_IN
+ * attributes are supported in the ids array.
+ */
+ const struct uverbs_attr_spec *ids;
+ } enum_def;
+
+ struct {
+ /*
+ * higher bits mean the namespace and lower bits mean
+ * the type id within the namespace.
+ */
+ u16 obj_type;
+ u16 min_len;
+ u16 max_len;
+ u8 access;
+ } objs_arr;
+ } u2;
+};
+
+/*
+ * Information about the API is loaded into a radix tree. For IOCTL we start
+ * with a tuple of:
+ * object_id, attr_id, method_id
+ *
+ * Which is a 48 bit value, with most of the bits guaranteed to be zero. Based
+ * on the current kernel support this is compressed into 16 bit key for the
+ * radix tree. Since this compression is entirely internal to the kernel the
+ * below limits can be revised if the kernel gains additional data.
+ *
+ * With 64 leafs per node this is a 3 level radix tree.
+ *
+ * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
+ * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
+ *
+ * This also encodes the tables for the write() and write() extended commands
+ * using the coding
+ * OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command #
+ * OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex #
+ * ie the WRITE path is treated as a special method type in the ioctl
+ * framework.
+ */
+enum uapi_radix_data {
+ UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
+
+ UVERBS_API_ATTR_KEY_BITS = 6,
+ UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
+ UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
+ UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS,
+
+ UVERBS_API_METHOD_KEY_BITS = 5,
+ UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
+ UVERBS_API_METHOD_KEY_NUM_CORE = 22,
+ UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT,
+ UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT,
+ UVERBS_API_METHOD_KEY_NUM_DRIVER =
+ (UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) -
+ UVERBS_API_METHOD_KEY_NUM_CORE,
+ UVERBS_API_METHOD_KEY_MASK = GENMASK(
+ UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
+ UVERBS_API_METHOD_KEY_SHIFT),
+
+ UVERBS_API_OBJ_KEY_BITS = 5,
+ UVERBS_API_OBJ_KEY_SHIFT =
+ UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT,
+ UVERBS_API_OBJ_KEY_NUM_CORE = 20,
+ UVERBS_API_OBJ_KEY_NUM_DRIVER =
+ (1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE,
+ UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT),
+
+ /* This id guaranteed to not exist in the radix tree */
+ UVERBS_API_KEY_ERR = 0xFFFFFFFF,
+};
+
+static inline __attribute_const__ u32 uapi_key_obj(u32 id)
+{
+ if (id & UVERBS_API_NS_FLAG) {
+ id &= ~UVERBS_API_NS_FLAG;
+ if (id >= UVERBS_API_OBJ_KEY_NUM_DRIVER)
+ return UVERBS_API_KEY_ERR;
+ id = id + UVERBS_API_OBJ_KEY_NUM_CORE;
+ } else {
+ if (id >= UVERBS_API_OBJ_KEY_NUM_CORE)
+ return UVERBS_API_KEY_ERR;
+ }
+
+ return id << UVERBS_API_OBJ_KEY_SHIFT;
+}
+
+static inline __attribute_const__ bool uapi_key_is_object(u32 key)
+{
+ return (key & ~UVERBS_API_OBJ_KEY_MASK) == 0;
+}
+
+static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id)
+{
+ if (id & UVERBS_API_NS_FLAG) {
+ id &= ~UVERBS_API_NS_FLAG;
+ if (id >= UVERBS_API_METHOD_KEY_NUM_DRIVER)
+ return UVERBS_API_KEY_ERR;
+ id = id + UVERBS_API_METHOD_KEY_NUM_CORE;
+ } else {
+ id++;
+ if (id >= UVERBS_API_METHOD_KEY_NUM_CORE)
+ return UVERBS_API_KEY_ERR;
+ }
+
+ return id << UVERBS_API_METHOD_KEY_SHIFT;
+}
+
+static inline __attribute_const__ u32 uapi_key_write_method(u32 id)
+{
+ if (id >= UVERBS_API_WRITE_KEY_NUM)
+ return UVERBS_API_KEY_ERR;
+ return UVERBS_API_METHOD_IS_WRITE | id;
+}
+
+static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id)
+{
+ if (id >= UVERBS_API_WRITE_KEY_NUM)
+ return UVERBS_API_KEY_ERR;
+ return UVERBS_API_METHOD_IS_WRITE_EX | id;
+}
+
+static inline __attribute_const__ u32
+uapi_key_attr_to_ioctl_method(u32 attr_key)
+{
+ return attr_key &
+ (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
+}
+
+static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
+{
+ unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+ return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
+ (key & UVERBS_API_ATTR_KEY_MASK) == 0;
+}
+
+static inline __attribute_const__ bool uapi_key_is_write_method(u32 key)
+{
+ return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE;
+}
+
+static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key)
+{
+ return (key & UVERBS_API_METHOD_KEY_MASK) ==
+ UVERBS_API_METHOD_IS_WRITE_EX;
+}
+
+static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
+{
+ /* 0 is the method slot itself */
+ return ioctl_method_key + 1;
+}
+
+static inline __attribute_const__ u32 uapi_key_attr(u32 id)
+{
+ /*
+ * The attr is designed to fit in the typical single radix tree node
+ * of 64 entries. Since allmost all methods have driver attributes we
+ * organize things so that the driver and core attributes interleave to
+ * reduce the length of the attributes array in typical cases.
+ */
+ if (id & UVERBS_API_NS_FLAG) {
+ id &= ~UVERBS_API_NS_FLAG;
+ id++;
+ if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
+ return UVERBS_API_KEY_ERR;
+ id = (id << 1) | 0;
+ } else {
+ if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
+ return UVERBS_API_KEY_ERR;
+ id = (id << 1) | 1;
+ }
+
+ return id;
+}
+
+/* Only true for ioctl methods */
+static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
+{
+ unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+ return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
+ (key & UVERBS_API_ATTR_KEY_MASK) != 0;
+}
+
+/*
+ * This returns a value in the range [0 to UVERBS_API_ATTR_BKEY_LEN),
+ * basically it undoes the reservation of 0 in the ID numbering. attr_key
+ * must already be masked with UVERBS_API_ATTR_KEY_MASK, or be the output of
+ * uapi_key_attr().
+ */
+static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key)
+{
+ return attr_key - 1;
+}
+
+static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey)
+{
+ return attr_bkey + 1;
+}
+
+/*
+ * =======================================
+ * Verbs definitions
+ * =======================================
+ */
+
+struct uverbs_attr_def {
+ u16 id;
+ struct uverbs_attr_spec attr;
+};
+
+struct uverbs_method_def {
+ u16 id;
+ /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */
+ u32 flags;
+ size_t num_attrs;
+ const struct uverbs_attr_def * const (*attrs)[];
+ int (*handler)(struct uverbs_attr_bundle *attrs);
+};
+
+struct uverbs_object_def {
+ u16 id;
+ const struct uverbs_obj_type *type_attrs;
+ size_t num_methods;
+ const struct uverbs_method_def * const (*methods)[];
+};
+
+enum uapi_definition_kind {
+ UAPI_DEF_END = 0,
+ UAPI_DEF_OBJECT_START,
+ UAPI_DEF_WRITE,
+ UAPI_DEF_CHAIN_OBJ_TREE,
+ UAPI_DEF_CHAIN,
+ UAPI_DEF_IS_SUPPORTED_FUNC,
+ UAPI_DEF_IS_SUPPORTED_DEV_FN,
+};
+
+enum uapi_definition_scope {
+ UAPI_SCOPE_OBJECT = 1,
+ UAPI_SCOPE_METHOD = 2,
+};
+
+struct uapi_definition {
+ u8 kind;
+ u8 scope;
+ union {
+ struct {
+ u16 object_id;
+ } object_start;
+ struct {
+ u16 command_num;
+ u8 is_ex:1;
+ u8 has_udata:1;
+ u8 has_resp:1;
+ u8 req_size;
+ u8 resp_size;
+ } write;
+ };
+
+ union {
+ bool (*func_is_supported)(struct ib_device *device);
+ int (*func_write)(struct uverbs_attr_bundle *attrs);
+ const struct uapi_definition *chain;
+ const struct uverbs_object_def *chain_obj_tree;
+ size_t needs_fn_offset;
+ };
+};
+
+/* Define things connected to object_id */
+#define DECLARE_UVERBS_OBJECT(_object_id, ...) \
+ { \
+ .kind = UAPI_DEF_OBJECT_START, \
+ .object_start = { .object_id = _object_id }, \
+ }, \
+ ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...) \
+ { \
+ .kind = UAPI_DEF_WRITE, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .write = { .is_ex = 0, .command_num = _command_num }, \
+ .func_write = _func, \
+ _cmd_desc, \
+ }, \
+ ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...) \
+ { \
+ .kind = UAPI_DEF_WRITE, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .write = { .is_ex = 1, .command_num = _command_num }, \
+ .func_write = _func, \
+ _cmd_desc, \
+ }, \
+ ##__VA_ARGS__
+
+/*
+ * Object is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .needs_fn_offset = \
+ offsetof(struct ib_device, ibdev_fn) + \
+ BUILD_BUG_ON_ZERO( \
+ sizeof(((struct ib_device *)0)->ibdev_fn) != \
+ sizeof(void *)), \
+ }
+
+/*
+ * Method is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \
+ .scope = UAPI_SCOPE_METHOD, \
+ .needs_fn_offset = \
+ offsetof(struct ib_device, ibdev_fn) + \
+ BUILD_BUG_ON_ZERO( \
+ sizeof(((struct ib_device *)0)->ibdev_fn) != \
+ sizeof(void *)), \
+ }
+
+/* Call a function to determine if the entire object is supported or not */
+#define UAPI_DEF_IS_OBJ_SUPPORTED(_func) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_FUNC, \
+ .scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func, \
+ }
+
+/* Include another struct uapi_definition in this one */
+#define UAPI_DEF_CHAIN(_def_var) \
+ { \
+ .kind = UAPI_DEF_CHAIN, .chain = _def_var, \
+ }
+
+/* Temporary until the tree base description is replaced */
+#define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr, ...) \
+ { \
+ .kind = UAPI_DEF_CHAIN_OBJ_TREE, \
+ .object_start = { .object_id = _object_enum }, \
+ .chain_obj_tree = _object_ptr, \
+ }, \
+ ##__VA_ARGS__
+#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \
+ UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum), \
+ ##__VA_ARGS__)
+
+/*
+ * =======================================
+ * Attribute Specifications
+ * =======================================
+ */
+
+#define UVERBS_ATTR_SIZE(_min_len, _len) \
+ .u.ptr.min_len = _min_len, .u.ptr.len = _len
+
+#define UVERBS_ATTR_NO_DATA() UVERBS_ATTR_SIZE(0, 0)
+
+/*
+ * Specifies a uapi structure that cannot be extended. The user must always
+ * supply the whole structure and nothing more. The structure must be declared
+ * in a header under include/uapi/rdma.
+ */
+#define UVERBS_ATTR_TYPE(_type) \
+ .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type)
+/*
+ * Specifies a uapi structure where the user must provide at least up to
+ * member 'last'. Anything after last and up until the end of the structure
+ * can be non-zero, anything longer than the end of the structure must be
+ * zero. The structure must be declared in a header under include/uapi/rdma.
+ */
+#define UVERBS_ATTR_STRUCT(_type, _last) \
+ .zero_trailing = 1, \
+ UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)), \
+ sizeof(_type))
+/*
+ * Specifies at least min_len bytes must be passed in, but the amount can be
+ * larger, up to the protocol maximum size. No check for zeroing is done.
+ */
+#define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
+
+/* Must be used in the '...' of any UVERBS_ATTR */
+#define UA_ALLOC_AND_COPY .alloc_and_copy = 1
+#define UA_MANDATORY .mandatory = 1
+#define UA_OPTIONAL .mandatory = 0
+
+/*
+ * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only
+ * READ\WRITE accesses are supported.
+ */
+#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \
+ ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = (_attr_id) + \
+ BUILD_BUG_ON_ZERO((_min_len) == 0 || \
+ (_max_len) > \
+ PAGE_SIZE / sizeof(void *) || \
+ (_min_len) > (_max_len) || \
+ (_access) == UVERBS_ACCESS_NEW || \
+ (_access) == UVERBS_ACCESS_DESTROY), \
+ .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \
+ .u2.objs_arr.obj_type = _idr_type, \
+ .u2.objs_arr.access = _access, \
+ .u2.objs_arr.min_len = _min_len, \
+ .u2.objs_arr.max_len = _max_len, \
+ __VA_ARGS__ } })
+
+/*
+ * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted,
+ * the user must validate the type of the uobject instead.
+ */
+#define UVERBS_IDR_ANY_OBJECT 0xFFFF
+
+#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = _attr_id, \
+ .attr = { .type = UVERBS_ATTR_TYPE_IDR, \
+ .u.obj.obj_type = _idr_type, \
+ .u.obj.access = _access, \
+ __VA_ARGS__ } })
+
+#define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = (_attr_id) + \
+ BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW && \
+ (_access) != UVERBS_ACCESS_READ), \
+ .attr = { .type = UVERBS_ATTR_TYPE_FD, \
+ .u.obj.obj_type = _fd_type, \
+ .u.obj.access = _access, \
+ __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = _attr_id, \
+ .attr = { .type = UVERBS_ATTR_TYPE_PTR_IN, \
+ _type, \
+ __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = _attr_id, \
+ .attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT, \
+ _type, \
+ __VA_ARGS__ } })
+
+/* _enum_arry should be a 'static const union uverbs_attr_spec[]' */
+#define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = _attr_id, \
+ .attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN, \
+ .u2.enum_def.ids = _enum_arr, \
+ .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr), \
+ __VA_ARGS__ }, \
+ })
+
+/* An input value that is a member in the enum _enum_type. */
+#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \
+ UVERBS_ATTR_PTR_IN( \
+ _attr_id, \
+ UVERBS_ATTR_SIZE( \
+ sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \
+ sizeof(u64)), \
+ __VA_ARGS__)
+
+/*
+ * An input value that is a bitwise combination of values of _enum_type.
+ * This permits the flag value to be passed as either a u32 or u64, it must
+ * be retrieved via uverbs_get_flag().
+ */
+#define UVERBS_ATTR_FLAGS_IN(_attr_id, _enum_type, ...) \
+ UVERBS_ATTR_PTR_IN( \
+ _attr_id, \
+ UVERBS_ATTR_SIZE(sizeof(u32) + BUILD_BUG_ON_ZERO( \
+ !sizeof(_enum_type *)), \
+ sizeof(u64)), \
+ __VA_ARGS__)
+
+/*
+ * This spec is used in order to pass information to the hardware driver in a
+ * legacy way. Every verb that could get driver specific data should get this
+ * spec.
+ */
+#define UVERBS_ATTR_UHW() \
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \
+ UVERBS_ATTR_MIN_SIZE(0), \
+ UA_OPTIONAL, \
+ .is_udata = 1), \
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \
+ UVERBS_ATTR_MIN_SIZE(0), \
+ UA_OPTIONAL, \
+ .is_udata = 1)
+
+/* =================================================
+ * Parsing infrastructure
+ * =================================================
+ */
+
+
+struct uverbs_ptr_attr {
+ /*
+ * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is
+ * used.
+ */
+ union {
+ void *ptr;
+ u64 data;
+ };
+ u16 len;
+ u16 uattr_idx;
+ u8 enum_id;
+};
+
+struct uverbs_obj_attr {
+ struct ib_uobject *uobject;
+ const struct uverbs_api_attr *attr_elm;
+};
+
+struct uverbs_objs_arr_attr {
+ struct ib_uobject **uobjects;
+ u16 len;
+};
+
+struct uverbs_attr {
+ union {
+ struct uverbs_ptr_attr ptr_attr;
+ struct uverbs_obj_attr obj_attr;
+ struct uverbs_objs_arr_attr objs_arr_attr;
+ };
+};
+
+struct uverbs_attr_bundle {
+ struct ib_udata driver_udata;
+ struct ib_udata ucore;
+ struct ib_uverbs_file *ufile;
+ struct ib_ucontext *context;
+ DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
+ struct uverbs_attr attrs[0];
+};
+
+static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle,
+ unsigned int idx)
+{
+ return test_bit(uapi_bkey_attr(uapi_key_attr(idx)),
+ attrs_bundle->attr_present);
+}
+
+/**
+ * rdma_udata_to_drv_context - Helper macro to get the driver's context out of
+ * ib_udata which is embedded in uverbs_attr_bundle.
+ *
+ * If udata is not NULL this cannot fail. Otherwise a NULL udata will result
+ * in a NULL ucontext pointer, as a safety precaution. Callers should be using
+ * 'udata' to determine if the driver call is in user or kernel mode, not
+ * 'ucontext'.
+ *
+ */
+#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \
+ (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \
+ driver_udata) \
+ ->context, \
+ drv_dev_struct, member) : \
+ (drv_dev_struct *)NULL)
+
+#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT)
+
+static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
+ u16 idx)
+{
+ if (!uverbs_attr_is_valid(attrs_bundle, idx))
+ return ERR_PTR(-ENOENT);
+
+ return &attrs_bundle->attrs[uapi_bkey_attr(uapi_key_attr(idx))];
+}
+
+static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle,
+ u16 idx)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ return attr->ptr_attr.enum_id;
+}
+
+static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle,
+ u16 idx)
+{
+ const struct uverbs_attr *attr;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ if (IS_ERR(attr))
+ return ERR_CAST(attr);
+
+ return attr->obj_attr.uobject->object;
+}
+
+static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_attr_bundle *attrs_bundle,
+ u16 idx)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+ if (IS_ERR(attr))
+ return ERR_CAST(attr);
+
+ return attr->obj_attr.uobject;
+}
+
+static inline int
+uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ return attr->ptr_attr.len;
+}
+
+/*
+ * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr
+ * attribute.
+ * @attrs: The attribute bundle
+ * @idx: The ID of the attribute
+ * @elem_size: The size of the element in the array
+ */
+static inline int
+uverbs_attr_ptr_get_array_size(struct uverbs_attr_bundle *attrs, u16 idx,
+ size_t elem_size)
+{
+ int size = uverbs_attr_get_len(attrs, idx);
+
+ if (size < 0)
+ return size;
+
+ if (size % elem_size)
+ return -EINVAL;
+
+ return size / elem_size;
+}
+
+/**
+ * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for
+ * UVERBS_ATTR_TYPE_IDRS_ARRAY.
+ * @arr: Returned pointer to array of pointers for uobjects or NULL if
+ * the attribute isn't provided.
+ *
+ * Return: The array length or 0 if no attribute was provided.
+ */
+static inline int uverbs_attr_get_uobjs_arr(
+ const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx,
+ struct ib_uobject ***arr)
+{
+ const struct uverbs_attr *attr =
+ uverbs_attr_get(attrs_bundle, attr_idx);
+
+ if (IS_ERR(attr)) {
+ *arr = NULL;
+ return 0;
+ }
+
+ *arr = attr->objs_arr_attr.uobjects;
+
+ return attr->objs_arr_attr.len;
+}
+
+static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
+{
+ return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
+}
+
+static inline void *uverbs_attr_get_alloced_ptr(
+ const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
+{
+ struct uverbs_attr *attr = __DECONST(struct uverbs_attr *, uverbs_attr_get(attrs_bundle, idx));
+
+ if (IS_ERR(attr))
+ return (void *)attr;
+
+ return uverbs_attr_ptr_is_inline(attr) ?
+ (void *)&attr->ptr_attr.data : attr->ptr_attr.ptr;
+}
+
+static inline int _uverbs_copy_from(void *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx,
+ size_t size)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ /*
+ * Validation ensures attr->ptr_attr.len >= size. If the caller is
+ * using UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO then it must call
+ * uverbs_copy_from_or_zero.
+ */
+ if (unlikely(size < attr->ptr_attr.len))
+ return -EINVAL;
+
+ if (uverbs_attr_ptr_is_inline(attr))
+ memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len);
+ else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data),
+ attr->ptr_attr.len))
+ return -EFAULT;
+
+ return 0;
+}
+
+static inline int _uverbs_copy_from_or_zero(void *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx,
+ size_t size)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+ size_t min_size;
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ min_size = min_t(size_t, size, attr->ptr_attr.len);
+
+ if (uverbs_attr_ptr_is_inline(attr))
+ memcpy(to, &attr->ptr_attr.data, min_size);
+ else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data),
+ min_size))
+ return -EFAULT;
+
+ if (size > min_size)
+ memset((char *)to + min_size, 0, size - min_size);
+
+ return 0;
+}
+
+#define uverbs_copy_from(to, attrs_bundle, idx) \
+ _uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to))
+
+#define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \
+ _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
+
+static inline struct ib_ucontext *
+ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs)
+{
+ return ib_uverbs_get_ucontext_file(attrs->ufile);
+}
+
+#if 1 /* IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) */
+int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits);
+int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits);
+int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx,
+ const void *from, size_t size);
+__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
+ gfp_t flags);
+
+static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
+ size_t size)
+{
+ return _uverbs_alloc(bundle, size, GFP_KERNEL);
+}
+
+static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
+ size_t size)
+{
+ return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
+}
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val);
+int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size);
+#else
+static inline int
+uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits)
+{
+ return -EINVAL;
+}
+static inline int
+uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits)
+{
+ return -EINVAL;
+}
+static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, const void *from, size_t size)
+{
+ return -EINVAL;
+}
+static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
+ size_t size)
+{
+ return ERR_PTR(-EINVAL);
+}
+static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
+ size_t size)
+{
+ return ERR_PTR(-EINVAL);
+}
+static inline int
+_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
+{
+ return -EINVAL;
+}
+static inline int
+uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size)
+{
+ return -EINVAL;
+}
+#endif
+
+#define uverbs_get_const(_to, _attrs_bundle, _idx) \
+ ({ \
+ s64 _val; \
+ int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*_to)), \
+ type_max(typeof(*_to)), NULL); \
+ (*_to) = _val; \
+ _ret; \
+ })
+
+#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \
+ ({ \
+ s64 _val; \
+ s64 _def_val = _default; \
+ int _ret = \
+ _uverbs_get_const(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*_to)), \
+ type_max(typeof(*_to)), &_def_val); \
+ (*_to) = _val; \
+ _ret; \
+ })
+#endif
diff --git a/sys/ofed/include/rdma/uverbs_named_ioctl.h b/sys/ofed/include/rdma/uverbs_named_ioctl.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/rdma/uverbs_named_ioctl.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UVERBS_NAMED_IOCTL_
+#define _UVERBS_NAMED_IOCTL_
+
+#include <rdma/uverbs_ioctl.h>
+
+#ifndef UVERBS_MODULE_NAME
+#error "Please #define UVERBS_MODULE_NAME before including rdma/uverbs_named_ioctl.h"
+#endif
+
+#define _UVERBS_PASTE(x, y) x ## y
+#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y)
+#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
+#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
+#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id)
+
+/* These are static so they do not need to be qualified */
+#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
+#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id
+
+#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \
+ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
+ _method_id)[] = { __VA_ARGS__ }; \
+ static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \
+ .id = _method_id, \
+ .handler = UVERBS_HANDLER(_method_id), \
+ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \
+ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \
+ }
+
+/* Create a standard destroy method using the default handler. The handle_attr
+ * argument must be the attribute specifying the handle to destroy, the
+ * default handler does not support any other attributes.
+ */
+#define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr) \
+ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
+ _method_id)[] = { _handle_attr }; \
+ static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \
+ .id = _method_id, \
+ .handler = uverbs_destroy_def_handler, \
+ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \
+ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \
+ }
+
+#define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...) \
+ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
+ _object_id)[] = { __VA_ARGS__ }; \
+ static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \
+ .id = _object_id, \
+ .type_attrs = &_type_attrs, \
+ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
+ .methods = &UVERBS_OBJECT_METHODS(_object_id) \
+ }
+
+/*
+ * Declare global methods. These still have a unique object_id because we
+ * identify all uapi methods with a (object,method) tuple. However, they have
+ * no type pointer.
+ */
+#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \
+ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
+ _object_id)[] = { __VA_ARGS__ }; \
+ static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \
+ .id = _object_id, \
+ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
+ .methods = &UVERBS_OBJECT_METHODS(_object_id) \
+ }
+
+/* Used by drivers to declare a complete parsing tree for new methods
+ */
+#define ADD_UVERBS_METHODS(_name, _object_id, ...) \
+ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
+ _object_id)[] = { __VA_ARGS__ }; \
+ static const struct uverbs_object_def _name = { \
+ .id = _object_id, \
+ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
+ .methods = &UVERBS_OBJECT_METHODS(_object_id) \
+ };
+
+/* Used by drivers to declare a complete parsing tree for a single method that
+ * differs only in having additional driver specific attributes.
+ */
+#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...) \
+ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
+ _method_id)[] = { __VA_ARGS__ }; \
+ static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \
+ .id = _method_id, \
+ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \
+ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \
+ }; \
+ ADD_UVERBS_METHODS(_name, _object_id, &UVERBS_METHOD(_method_id))
+
+#endif
diff --git a/sys/ofed/include/rdma/uverbs_std_types.h b/sys/ofed/include/rdma/uverbs_std_types.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/rdma/uverbs_std_types.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UVERBS_STD_TYPES__
+#define _UVERBS_STD_TYPES__
+
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/ib_user_ioctl_verbs.h>
+
+/* Returns _id, or causes a compile error if _id is not a u32.
+ *
+ * The uobj APIs should only be used with the write based uAPI to access
+ * object IDs. The write API must use a u32 for the object handle, which is
+ * checked by this macro.
+ */
+#define _uobj_check_id(_id) ({ CTASSERT(sizeof(_id) == sizeof(u32)); (_id); })
+
+#define uobj_get_type(_attrs, _object) \
+ uapi_get_object((_attrs)->ufile->device->uapi, _object)
+
+#define uobj_get_read(_type, _id, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+ _uobj_check_id(_id), UVERBS_LOOKUP_READ, \
+ _attrs)
+
+#define ufd_get_read(_type, _fdnum, _attrs) ({ \
+ CTASSERT(sizeof(_fdnum) == sizeof(s32)); \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+ (_fdnum), \
+ UVERBS_LOOKUP_READ, _attrs); \
+})
+
+static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
+{
+ if (IS_ERR(uobj))
+ return NULL;
+ return uobj->object;
+}
+#define uobj_get_obj_read(_object, _type, _id, _attrs) \
+ ((struct ib_##_object *)_uobj_get_obj_read( \
+ uobj_get_read(_type, _id, _attrs)))
+
+#define uobj_get_write(_type, _id, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+ _uobj_check_id(_id), UVERBS_LOOKUP_WRITE, \
+ _attrs)
+
+int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
+ struct uverbs_attr_bundle *attrs);
+#define uobj_perform_destroy(_type, _id, _attrs) \
+ __uobj_perform_destroy(uobj_get_type(_attrs, _type), \
+ _uobj_check_id(_id), _attrs)
+
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
+ u32 id, struct uverbs_attr_bundle *attrs);
+
+#define uobj_get_destroy(_type, _id, _attrs) \
+ __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \
+ _attrs)
+
+static inline void uobj_put_destroy(struct ib_uobject *uobj)
+{
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+}
+
+static inline void uobj_put_read(struct ib_uobject *uobj)
+{
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
+}
+
+#define uobj_put_obj_read(_obj) \
+ uobj_put_read((_obj)->uobject)
+
+static inline void uobj_put_write(struct ib_uobject *uobj)
+{
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+}
+
+static inline void uobj_alloc_abort(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs)
+{
+ rdma_alloc_abort_uobject(uobj, attrs);
+}
+
+static inline struct ib_uobject *
+__uobj_alloc(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev)
+{
+ struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs);
+
+ if (!IS_ERR(uobj))
+ *ib_dev = attrs->context->device;
+ return uobj;
+}
+
+#define uobj_alloc(_type, _attrs, _ib_dev) \
+ __uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev)
+
+static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
+ struct ib_uobject *uobj,
+ struct ib_device *ib_dev,
+ enum ib_flow_action_type type)
+{
+ atomic_set(&action->usecnt, 0);
+ action->device = ib_dev;
+ action->type = type;
+ action->uobject = uobj;
+ uobj->object = action;
+}
+
+struct ib_uflow_resources {
+ size_t max;
+ size_t num;
+ size_t collection_num;
+ size_t counters_num;
+ struct ib_counters **counters;
+ struct ib_flow_action **collection;
+};
+
+struct ib_uflow_object {
+ struct ib_uobject uobject;
+ struct ib_uflow_resources *resources;
+};
+
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs);
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ enum ib_flow_spec_type type,
+ void *ibobj);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+ struct ib_qp *qp, struct ib_device *device,
+ struct ib_uflow_resources *uflow_res)
+{
+ struct ib_uflow_object *uflow;
+
+ uobj->object = ibflow;
+ ibflow->uobject = uobj;
+
+ if (qp) {
+ atomic_inc(&qp->usecnt);
+ ibflow->qp = qp;
+ }
+
+ ibflow->device = device;
+ uflow = container_of(uobj, typeof(*uflow), uobject);
+ uflow->resources = uflow_res;
+}
+
+struct uverbs_api_object {
+ const struct uverbs_obj_type *type_attrs;
+ const struct uverbs_obj_type_class *type_class;
+ u8 disabled:1;
+ u32 id;
+};
+
+static inline u32 uobj_get_object_id(struct ib_uobject *uobj)
+{
+ return uobj->uapi_object->id;
+}
+
+#endif
+
diff --git a/sys/ofed/include/rdma/uverbs_types.h b/sys/ofed/include/rdma/uverbs_types.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/rdma/uverbs_types.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UVERBS_TYPES_
+#define _UVERBS_TYPES_
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <rdma/ib_verbs.h>
+
+struct uverbs_obj_type;
+struct uverbs_api_object;
+struct uverbs_attr_bundle;
+struct ib_uverbs_file;
+
+enum rdma_remove_reason;
+
+enum rdma_lookup_mode {
+ UVERBS_LOOKUP_READ,
+ UVERBS_LOOKUP_WRITE,
+ /*
+ * Destroy is like LOOKUP_WRITE, except that the uobject is not
+ * locked. uobj_destroy is used to convert a LOOKUP_DESTROY lock into
+ * a LOOKUP_WRITE lock.
+ */
+ UVERBS_LOOKUP_DESTROY,
+};
+
+/*
+ * The following sequences are valid:
+ * Success flow:
+ * alloc_begin
+ * alloc_commit
+ * [..]
+ * Access flow:
+ * lookup_get(exclusive=false) & uverbs_try_lock_object
+ * lookup_put(exclusive=false) via rdma_lookup_put_uobject
+ * Destruction flow:
+ * lookup_get(exclusive=true) & uverbs_try_lock_object
+ * remove_commit
+ * remove_handle (optional)
+ * lookup_put(exclusive=true) via rdma_lookup_put_uobject
+ *
+ * Allocate Error flow #1
+ * alloc_begin
+ * alloc_abort
+ * Allocate Error flow #2
+ * alloc_begin
+ * remove_commit
+ * alloc_abort
+ * Allocate Error flow #3
+ * alloc_begin
+ * alloc_commit (fails)
+ * remove_commit
+ * alloc_abort
+ *
+ * In all cases the caller must hold the ufile kref until alloc_commit or
+ * alloc_abort returns.
+ */
+struct uverbs_obj_type_class {
+ struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs);
+ /* This consumes the kref on uobj */
+ void (*alloc_commit)(struct ib_uobject *uobj);
+ /* This does not consume the kref on uobj */
+ void (*alloc_abort)(struct ib_uobject *uobj);
+
+ struct ib_uobject *(*lookup_get)(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode);
+ void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
+ /* This does not consume the kref on uobj */
+ int __must_check (*destroy_hw)(struct ib_uobject *uobj,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs);
+ void (*remove_handle)(struct ib_uobject *uobj);
+};
+
+struct uverbs_obj_type {
+ const struct uverbs_obj_type_class * const type_class;
+ size_t obj_size;
+};
+
+/*
+ * Objects type classes which support a detach state (object is still alive but
+ * it's not attached to any context need to make sure:
+ * (a) no call through to a driver after a detach is called
+ * (b) detach isn't called concurrently with context_cleanup
+ */
+
+struct uverbs_obj_idr_type {
+ /*
+ * In idr based objects, uverbs_obj_type_class points to a generic
+ * idr operations. In order to specialize the underlying types (e.g. CQ,
+ * QPs, etc.), we add destroy_object specific callbacks.
+ */
+ struct uverbs_obj_type type;
+
+ /* Free driver resources from the uobject, make the driver uncallable,
+ * and move the uobject to the detached state. If the object was
+ * destroyed by the user's request, a failure should leave the uobject
+ * completely unchanged.
+ */
+ int __must_check (*destroy_object)(struct ib_uobject *uobj,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs);
+};
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode,
+ struct uverbs_attr_bundle *attrs);
+void rdma_lookup_put_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode);
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs);
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs);
+void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs);
+
+/*
+ * uverbs_uobject_get is called in order to increase the reference count on
+ * an uobject. This is useful when a handler wants to keep the uobject's memory
+ * alive, regardless if this uobject is still alive in the context's objects
+ * repository. Objects are put via uverbs_uobject_put.
+ */
+static inline void uverbs_uobject_get(struct ib_uobject *uobject)
+{
+ kref_get(&uobject->ref);
+}
+void uverbs_uobject_put(struct ib_uobject *uobject);
+
+struct uverbs_obj_fd_type {
+ /*
+ * In fd based objects, uverbs_obj_type_ops points to generic
+ * fd operations. In order to specialize the underlying types (e.g.
+ * completion_channel), we use fops, name and flags for fd creation.
+ * destroy_object is called when the uobject is to be destroyed,
+ * because the driver is removed or the FD is closed.
+ */
+ struct uverbs_obj_type type;
+ int (*destroy_object)(struct ib_uobject *uobj,
+ enum rdma_remove_reason why);
+ const struct file_operations *fops;
+ const char *name;
+ int flags;
+};
+
+extern const struct uverbs_obj_type_class uverbs_idr_class;
+extern const struct uverbs_obj_type_class uverbs_fd_class;
+int uverbs_uobject_fd_release(struct inode *inode, struct file *filp);
+
+#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \
+ sizeof(char))
+#define UVERBS_TYPE_ALLOC_FD(_obj_size, _destroy_object, _fops, _name, _flags) \
+ ((&((const struct uverbs_obj_fd_type) \
+ {.type = { \
+ .type_class = &uverbs_fd_class, \
+ .obj_size = (_obj_size) + \
+ UVERBS_BUILD_BUG_ON((_obj_size) < \
+ sizeof(struct ib_uobject)), \
+ }, \
+ .destroy_object = _destroy_object, \
+ .fops = _fops, \
+ .name = _name, \
+ .flags = _flags}))->type)
+#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object) \
+ ((&((const struct uverbs_obj_idr_type) \
+ {.type = { \
+ .type_class = &uverbs_idr_class, \
+ .obj_size = (_size) + \
+ UVERBS_BUILD_BUG_ON((_size) < \
+ sizeof(struct ib_uobject)) \
+ }, \
+ .destroy_object = _destroy_object,}))->type)
+#define UVERBS_TYPE_ALLOC_IDR(_destroy_object) \
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), \
+ _destroy_object)
+
+#endif
diff --git a/sys/ofed/include/uapi/rdma/ib_user_ioctl_cmds.h b/sys/ofed/include/uapi/rdma/ib_user_ioctl_cmds.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_USER_IOCTL_CMDS_H
+#define IB_USER_IOCTL_CMDS_H
+
+#define UVERBS_ID_NS_MASK 0xF000
+#define UVERBS_ID_NS_SHIFT 12
+
+#define UVERBS_UDATA_DRIVER_DATA_NS 1
+#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT)
+
+enum uverbs_default_objects {
+ UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */
+ UVERBS_OBJECT_PD,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_OBJECT_CQ,
+ UVERBS_OBJECT_QP,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_OBJECT_AH,
+ UVERBS_OBJECT_MR,
+ UVERBS_OBJECT_MW,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_OBJECT_WQ,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_OBJECT_DM,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_OBJECT_ASYNC_EVENT,
+};
+
+enum {
+ UVERBS_ATTR_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG,
+ UVERBS_ATTR_UHW_OUT,
+};
+
+enum uverbs_methods_device {
+ UVERBS_METHOD_INVOKE_WRITE,
+ UVERBS_METHOD_INFO_HANDLES,
+ UVERBS_METHOD_QUERY_PORT,
+ UVERBS_METHOD_GET_CONTEXT,
+};
+
+enum uverbs_attrs_invoke_write_cmd_attr_ids {
+ UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_CORE_OUT,
+ UVERBS_ATTR_WRITE_CMD,
+};
+
+enum uverbs_attrs_query_port_cmd_attr_ids {
+ UVERBS_ATTR_QUERY_PORT_PORT_NUM,
+ UVERBS_ATTR_QUERY_PORT_RESP,
+};
+
+enum uverbs_attrs_get_context_attr_ids {
+ UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+ UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+};
+
+enum uverbs_attrs_create_cq_cmd_attr_ids {
+ UVERBS_ATTR_CREATE_CQ_HANDLE,
+ UVERBS_ATTR_CREATE_CQ_CQE,
+ UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+ UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+ UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
+ UVERBS_ATTR_CREATE_CQ_FLAGS,
+ UVERBS_ATTR_CREATE_CQ_RESP_CQE,
+};
+
+enum uverbs_attrs_destroy_cq_cmd_attr_ids {
+ UVERBS_ATTR_DESTROY_CQ_HANDLE,
+ UVERBS_ATTR_DESTROY_CQ_RESP,
+};
+
+enum uverbs_attrs_create_flow_action_esp {
+ UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+};
+
+enum uverbs_attrs_modify_flow_action_esp {
+ UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE =
+ UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+};
+
+enum uverbs_attrs_destroy_flow_action_esp {
+ UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+};
+
+enum uverbs_methods_cq {
+ UVERBS_METHOD_CQ_CREATE,
+ UVERBS_METHOD_CQ_DESTROY,
+};
+
+enum uverbs_methods_actions_flow_action_ops {
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ UVERBS_METHOD_FLOW_ACTION_DESTROY,
+ UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+};
+
+enum uverbs_attrs_alloc_dm_cmd_attr_ids {
+ UVERBS_ATTR_ALLOC_DM_HANDLE,
+ UVERBS_ATTR_ALLOC_DM_LENGTH,
+ UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+};
+
+enum uverbs_attrs_free_dm_cmd_attr_ids {
+ UVERBS_ATTR_FREE_DM_HANDLE,
+};
+
+enum uverbs_methods_dm {
+ UVERBS_METHOD_DM_ALLOC,
+ UVERBS_METHOD_DM_FREE,
+};
+
+enum uverbs_attrs_reg_dm_mr_cmd_attr_ids {
+ UVERBS_ATTR_REG_DM_MR_HANDLE,
+ UVERBS_ATTR_REG_DM_MR_OFFSET,
+ UVERBS_ATTR_REG_DM_MR_LENGTH,
+ UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
+ UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
+ UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
+ UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+};
+
+enum uverbs_methods_mr {
+ UVERBS_METHOD_DM_MR_REG,
+ UVERBS_METHOD_MR_DESTROY,
+ UVERBS_METHOD_ADVISE_MR,
+};
+
+enum uverbs_attrs_mr_destroy_ids {
+ UVERBS_ATTR_DESTROY_MR_HANDLE,
+};
+
+enum uverbs_attrs_advise_mr_cmd_attr_ids {
+ UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
+ UVERBS_ATTR_ADVISE_MR_ADVICE,
+ UVERBS_ATTR_ADVISE_MR_FLAGS,
+ UVERBS_ATTR_ADVISE_MR_SGE_LIST,
+};
+
+enum uverbs_attrs_create_counters_cmd_attr_ids {
+ UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+};
+
+enum uverbs_attrs_destroy_counters_cmd_attr_ids {
+ UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+};
+
+enum uverbs_attrs_read_counters_cmd_attr_ids {
+ UVERBS_ATTR_READ_COUNTERS_HANDLE,
+ UVERBS_ATTR_READ_COUNTERS_BUFF,
+ UVERBS_ATTR_READ_COUNTERS_FLAGS,
+};
+
+enum uverbs_methods_actions_counters_ops {
+ UVERBS_METHOD_COUNTERS_CREATE,
+ UVERBS_METHOD_COUNTERS_DESTROY,
+ UVERBS_METHOD_COUNTERS_READ,
+};
+
+enum uverbs_attrs_info_handles_id {
+ UVERBS_ATTR_INFO_OBJECT_ID,
+ UVERBS_ATTR_INFO_TOTAL_HANDLES,
+ UVERBS_ATTR_INFO_HANDLES_LIST,
+};
+
+enum uverbs_methods_pd {
+ UVERBS_METHOD_PD_DESTROY,
+};
+
+enum uverbs_attrs_pd_destroy_ids {
+ UVERBS_ATTR_DESTROY_PD_HANDLE,
+};
+
+enum uverbs_methods_mw {
+ UVERBS_METHOD_MW_DESTROY,
+};
+
+enum uverbs_attrs_mw_destroy_ids {
+ UVERBS_ATTR_DESTROY_MW_HANDLE,
+};
+
+enum uverbs_methods_xrcd {
+ UVERBS_METHOD_XRCD_DESTROY,
+};
+
+enum uverbs_attrs_xrcd_destroy_ids {
+ UVERBS_ATTR_DESTROY_XRCD_HANDLE,
+};
+
+enum uverbs_methods_ah {
+ UVERBS_METHOD_AH_DESTROY,
+};
+
+enum uverbs_attrs_ah_destroy_ids {
+ UVERBS_ATTR_DESTROY_AH_HANDLE,
+};
+
+enum uverbs_methods_rwq_ind_tbl {
+ UVERBS_METHOD_RWQ_IND_TBL_DESTROY,
+};
+
+enum uverbs_attrs_rwq_ind_tbl_destroy_ids {
+ UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE,
+};
+
+enum uverbs_methods_flow {
+ UVERBS_METHOD_FLOW_DESTROY,
+};
+
+enum uverbs_attrs_flow_destroy_ids {
+ UVERBS_ATTR_DESTROY_FLOW_HANDLE,
+};
+
+enum uverbs_method_async_event {
+ UVERBS_METHOD_ASYNC_EVENT_ALLOC,
+};
+
+enum uverbs_attrs_async_event_create {
+ UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
+};
+
+#endif
diff --git a/sys/ofed/include/uapi/rdma/ib_user_ioctl_verbs.h b/sys/ofed/include/uapi/rdma/ib_user_ioctl_verbs.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
+/*
+ * Copyright (c) 2017-2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_USER_IOCTL_VERBS_H
+#define IB_USER_IOCTL_VERBS_H
+
+#include <linux/types.h>
+#include <rdma/ib_user_verbs.h>
+
+#ifndef RDMA_UAPI_PTR
+#define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name
+#endif
+
+#define IB_UVERBS_ACCESS_OPTIONAL_FIRST (1 << 20)
+#define IB_UVERBS_ACCESS_OPTIONAL_LAST (1 << 29)
+
+enum ib_uverbs_core_support {
+ IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS = 1 << 0,
+};
+
+enum ib_uverbs_access_flags {
+ IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0,
+ IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1,
+ IB_UVERBS_ACCESS_REMOTE_READ = 1 << 2,
+ IB_UVERBS_ACCESS_REMOTE_ATOMIC = 1 << 3,
+ IB_UVERBS_ACCESS_MW_BIND = 1 << 4,
+ IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5,
+ IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6,
+ IB_UVERBS_ACCESS_HUGETLB = 1 << 7,
+
+ IB_UVERBS_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_OPTIONAL_FIRST,
+ IB_UVERBS_ACCESS_OPTIONAL_RANGE =
+ ((IB_UVERBS_ACCESS_OPTIONAL_LAST << 1) - 1) &
+ ~(IB_UVERBS_ACCESS_OPTIONAL_FIRST - 1)
+};
+
+enum ib_uverbs_query_port_cap_flags {
+ IB_UVERBS_PCF_SM = 1 << 1,
+ IB_UVERBS_PCF_NOTICE_SUP = 1 << 2,
+ IB_UVERBS_PCF_TRAP_SUP = 1 << 3,
+ IB_UVERBS_PCF_OPT_IPD_SUP = 1 << 4,
+ IB_UVERBS_PCF_AUTO_MIGR_SUP = 1 << 5,
+ IB_UVERBS_PCF_SL_MAP_SUP = 1 << 6,
+ IB_UVERBS_PCF_MKEY_NVRAM = 1 << 7,
+ IB_UVERBS_PCF_PKEY_NVRAM = 1 << 8,
+ IB_UVERBS_PCF_LED_INFO_SUP = 1 << 9,
+ IB_UVERBS_PCF_SM_DISABLED = 1 << 10,
+ IB_UVERBS_PCF_SYS_IMAGE_GUID_SUP = 1 << 11,
+ IB_UVERBS_PCF_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+ IB_UVERBS_PCF_EXTENDED_SPEEDS_SUP = 1 << 14,
+ IB_UVERBS_PCF_CM_SUP = 1 << 16,
+ IB_UVERBS_PCF_SNMP_TUNNEL_SUP = 1 << 17,
+ IB_UVERBS_PCF_REINIT_SUP = 1 << 18,
+ IB_UVERBS_PCF_DEVICE_MGMT_SUP = 1 << 19,
+ IB_UVERBS_PCF_VENDOR_CLASS_SUP = 1 << 20,
+ IB_UVERBS_PCF_DR_NOTICE_SUP = 1 << 21,
+ IB_UVERBS_PCF_CAP_MASK_NOTICE_SUP = 1 << 22,
+ IB_UVERBS_PCF_BOOT_MGMT_SUP = 1 << 23,
+ IB_UVERBS_PCF_LINK_LATENCY_SUP = 1 << 24,
+ IB_UVERBS_PCF_CLIENT_REG_SUP = 1 << 25,
+ /*
+ * IsOtherLocalChangesNoticeSupported is aliased by IP_BASED_GIDS and
+ * is inaccessible
+ */
+ IB_UVERBS_PCF_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
+ IB_UVERBS_PCF_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
+ IB_UVERBS_PCF_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
+ IB_UVERBS_PCF_MCAST_FDB_TOP_SUP = 1 << 30,
+ IB_UVERBS_PCF_HIERARCHY_INFO_SUP = 1ULL << 31,
+
+ /* NOTE this is an internal flag, not an IBA flag */
+ IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26,
+};
+
+enum ib_uverbs_query_port_flags {
+ IB_UVERBS_QPF_GRH_REQUIRED = 1 << 0,
+};
+
+enum ib_uverbs_flow_action_esp_keymat {
+ IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM,
+};
+
+enum ib_uverbs_flow_action_esp_keymat_aes_gcm_iv_algo {
+ IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ,
+};
+
+struct ib_uverbs_flow_action_esp_keymat_aes_gcm {
+ __aligned_u64 iv;
+ __u32 iv_algo; /* Use enum ib_uverbs_flow_action_esp_keymat_aes_gcm_iv_algo */
+
+ __u32 salt;
+ __u32 icv_len;
+
+ __u32 key_len;
+ __u32 aes_key[256 / 32];
+};
+
+enum ib_uverbs_flow_action_esp_replay {
+ IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE,
+ IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP,
+};
+
+struct ib_uverbs_flow_action_esp_replay_bmp {
+ __u32 size;
+};
+
+enum ib_uverbs_flow_action_esp_flags {
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_INLINE_CRYPTO = 0UL << 0, /* Default */
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_FULL_OFFLOAD = 1UL << 0,
+
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_TUNNEL = 0UL << 1, /* Default */
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_TRANSPORT = 1UL << 1,
+
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_DECRYPT = 0UL << 2, /* Default */
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT = 1UL << 2,
+
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW = 1UL << 3,
+};
+
+struct ib_uverbs_flow_action_esp_encap {
+ /* This struct represents a list of pointers to flow_xxxx_filter that
+ * encapsulates the payload in ESP tunnel mode.
+ */
+ RDMA_UAPI_PTR(void *, val_ptr); /* pointer to a flow_xxxx_filter */
+ RDMA_UAPI_PTR(struct ib_uverbs_flow_action_esp_encap *, next_ptr);
+ __u16 len; /* Len of the filter struct val_ptr points to */
+ __u16 type; /* Use flow_spec_type enum */
+};
+
+struct ib_uverbs_flow_action_esp {
+ __u32 spi;
+ __u32 seq;
+ __u32 tfc_pad;
+ __u32 flags;
+ __aligned_u64 hard_limit_pkts;
+};
+
+enum ib_uverbs_read_counters_flags {
+ /* prefer read values from driver cache */
+ IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0,
+};
+
+enum ib_uverbs_advise_mr_advice {
+ IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH,
+ IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE,
+};
+
+enum ib_uverbs_advise_mr_flag {
+ IB_UVERBS_ADVISE_MR_FLAG_FLUSH = 1 << 0,
+};
+
+struct ib_uverbs_query_port_resp_ex {
+ struct ib_uverbs_query_port_resp legacy_resp;
+ __u16 port_cap_flags2;
+ __u8 reserved[6];
+};
+
+enum rdma_driver_id {
+ RDMA_DRIVER_UNKNOWN,
+ RDMA_DRIVER_MLX5,
+ RDMA_DRIVER_MLX4,
+ RDMA_DRIVER_CXGB3,
+ RDMA_DRIVER_CXGB4,
+ RDMA_DRIVER_MTHCA,
+ RDMA_DRIVER_BNXT_RE,
+ RDMA_DRIVER_OCRDMA,
+ RDMA_DRIVER_NES,
+ RDMA_DRIVER_I40IW,
+ RDMA_DRIVER_VMW_PVRDMA,
+ RDMA_DRIVER_QEDR,
+ RDMA_DRIVER_HNS,
+ RDMA_DRIVER_USNIC,
+ RDMA_DRIVER_RXE,
+ RDMA_DRIVER_HFI1,
+ RDMA_DRIVER_QIB,
+ RDMA_DRIVER_EFA,
+ RDMA_DRIVER_SIW,
+ RDMA_DRIVER_QLNXR,
+};
+
+#endif
diff --git a/sys/ofed/include/uapi/rdma/ib_user_mad.h b/sys/ofed/include/uapi/rdma/ib_user_mad.h
--- a/sys/ofed/include/uapi/rdma/ib_user_mad.h
+++ b/sys/ofed/include/uapi/rdma/ib_user_mad.h
@@ -38,13 +38,7 @@
#ifndef IB_USER_MAD_H
#define IB_USER_MAD_H
-#ifdef _KERNEL
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#else
-#include <infiniband/types.h>
-#include <sys/ioccom.h>
-#endif
+#include <rdma/rdma_user_ioctl.h>
/*
* Increment this value if any changes that break userspace ABI
@@ -239,16 +233,4 @@
__u8 reserved[3];
};
-#define IB_IOCTL_MAGIC 0x1b
-
-#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \
- struct ib_user_mad_reg_req)
-
-#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32)
-
-#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3)
-
-#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(IB_IOCTL_MAGIC, 4, \
- struct ib_user_mad_reg_req2)
-
#endif /* IB_USER_MAD_H */
diff --git a/sys/ofed/include/uapi/rdma/ib_user_verbs.h b/sys/ofed/include/uapi/rdma/ib_user_verbs.h
--- a/sys/ofed/include/uapi/rdma/ib_user_verbs.h
+++ b/sys/ofed/include/uapi/rdma/ib_user_verbs.h
@@ -53,7 +53,7 @@
#define IB_USER_VERBS_ABI_VERSION 6
#define IB_USER_VERBS_CMD_THRESHOLD 50
-enum {
+enum ib_uverbs_write_cmds {
IB_USER_VERBS_CMD_GET_CONTEXT,
IB_USER_VERBS_CMD_QUERY_DEVICE,
IB_USER_VERBS_CMD_QUERY_PORT,
@@ -101,13 +101,15 @@
IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ,
IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
+ IB_USER_VERBS_EX_CMD_MODIFY_QP = IB_USER_VERBS_CMD_MODIFY_QP,
IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
IB_USER_VERBS_EX_CMD_CREATE_WQ,
IB_USER_VERBS_EX_CMD_MODIFY_WQ,
IB_USER_VERBS_EX_CMD_DESTROY_WQ,
IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
- IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL
+ IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL,
+ IB_USER_VERBS_EX_CMD_MODIFY_CQ
};
/*
@@ -122,13 +124,19 @@
*/
struct ib_uverbs_async_event_desc {
- __u64 element;
+ __aligned_u64 element;
__u32 event_type; /* enum ib_event_type */
__u32 reserved;
};
struct ib_uverbs_comp_event_desc {
- __u64 cq_handle;
+ __aligned_u64 cq_handle;
+};
+
+struct ib_uverbs_cq_moderation_caps {
+ __u16 max_cq_moderation_count;
+ __u16 max_cq_moderation_period;
+ __u32 reserved;
};
/*
@@ -140,10 +148,7 @@
*/
#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff
-#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u
-#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24
-
-#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80
+#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80000000u
struct ib_uverbs_cmd_hdr {
__u32 command;
@@ -152,33 +157,34 @@
};
struct ib_uverbs_ex_cmd_hdr {
- __u64 response;
+ __aligned_u64 response;
__u16 provider_in_words;
__u16 provider_out_words;
__u32 cmd_hdr_reserved;
};
struct ib_uverbs_get_context {
- __u64 response;
- __u64 driver_data[0];
+ __aligned_u64 response;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_get_context_resp {
__u32 async_fd;
__u32 num_comp_vectors;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_device {
- __u64 response;
- __u64 driver_data[0];
+ __aligned_u64 response;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_device_resp {
- __u64 fw_ver;
+ __aligned_u64 fw_ver;
__be64 node_guid;
__be64 sys_image_guid;
- __u64 max_mr_size;
- __u64 page_size_cap;
+ __aligned_u64 max_mr_size;
+ __aligned_u64 page_size_cap;
__u32 vendor_id;
__u32 vendor_part_id;
__u32 hw_ver;
@@ -223,7 +229,7 @@
};
struct ib_uverbs_odp_caps {
- __u64 general_caps;
+ __aligned_u64 general_caps;
struct {
__u32 rc_odp_caps;
__u32 uc_odp_caps;
@@ -243,28 +249,47 @@
__u32 reserved;
};
+struct ib_uverbs_tm_caps {
+ /* Max size of rendezvous request message */
+ __u32 max_rndv_hdr_size;
+ /* Max number of entries in tag matching list */
+ __u32 max_num_tags;
+ /* TM flags */
+ __u32 flags;
+ /* Max number of outstanding list operations */
+ __u32 max_ops;
+ /* Max number of SGE in tag matching entry */
+ __u32 max_sge;
+ __u32 reserved;
+};
+
struct ib_uverbs_ex_query_device_resp {
struct ib_uverbs_query_device_resp base;
__u32 comp_mask;
__u32 response_length;
struct ib_uverbs_odp_caps odp_caps;
- __u64 timestamp_mask;
- __u64 hca_core_clock; /* in KHZ */
- __u64 device_cap_flags_ex;
+ __aligned_u64 timestamp_mask;
+ __aligned_u64 hca_core_clock; /* in KHZ */
+ __aligned_u64 device_cap_flags_ex;
struct ib_uverbs_rss_caps rss_caps;
__u32 max_wq_type_rq;
+ __u32 raw_packet_caps;
+ struct ib_uverbs_tm_caps tm_caps;
+ struct ib_uverbs_cq_moderation_caps cq_moderation_caps;
+ __aligned_u64 max_dm_size;
+ __u32 xrc_odp_caps;
__u32 reserved;
};
struct ib_uverbs_query_port {
- __u64 response;
+ __aligned_u64 response;
__u8 port_num;
__u8 reserved[7];
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_port_resp {
- __u32 port_cap_flags;
+ __u32 port_cap_flags; /* see ib_uverbs_query_port_cap_flags */
__u32 max_msg_sz;
__u32 bad_pkey_cntr;
__u32 qkey_viol_cntr;
@@ -284,16 +309,18 @@
__u8 active_speed;
__u8 phys_state;
__u8 link_layer;
- __u8 reserved[2];
+ __u8 flags; /* see ib_uverbs_query_port_flags */
+ __u8 reserved;
};
struct ib_uverbs_alloc_pd {
- __u64 response;
- __u64 driver_data[0];
+ __aligned_u64 response;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_alloc_pd_resp {
__u32 pd_handle;
+ __u32 driver_data[0];
};
struct ib_uverbs_dealloc_pd {
@@ -301,14 +328,15 @@
};
struct ib_uverbs_open_xrcd {
- __u64 response;
+ __aligned_u64 response;
__u32 fd;
__u32 oflags;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_open_xrcd_resp {
__u32 xrcd_handle;
+ __u32 driver_data[0];
};
struct ib_uverbs_close_xrcd {
@@ -316,35 +344,38 @@
};
struct ib_uverbs_reg_mr {
- __u64 response;
- __u64 start;
- __u64 length;
- __u64 hca_va;
+ __aligned_u64 response;
+ __aligned_u64 start;
+ __aligned_u64 length;
+ __aligned_u64 hca_va;
__u32 pd_handle;
__u32 access_flags;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_reg_mr_resp {
__u32 mr_handle;
__u32 lkey;
__u32 rkey;
+ __u32 driver_data[0];
};
struct ib_uverbs_rereg_mr {
- __u64 response;
+ __aligned_u64 response;
__u32 mr_handle;
__u32 flags;
- __u64 start;
- __u64 length;
- __u64 hca_va;
+ __aligned_u64 start;
+ __aligned_u64 length;
+ __aligned_u64 hca_va;
__u32 pd_handle;
__u32 access_flags;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_rereg_mr_resp {
__u32 lkey;
__u32 rkey;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_dereg_mr {
@@ -352,15 +383,17 @@
};
struct ib_uverbs_alloc_mw {
- __u64 response;
+ __aligned_u64 response;
__u32 pd_handle;
__u8 mw_type;
__u8 reserved[3];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_alloc_mw_resp {
__u32 mw_handle;
__u32 rkey;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_dealloc_mw {
@@ -368,7 +401,7 @@
};
struct ib_uverbs_create_comp_channel {
- __u64 response;
+ __aligned_u64 response;
};
struct ib_uverbs_create_comp_channel_resp {
@@ -376,28 +409,34 @@
};
struct ib_uverbs_create_cq {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 cqe;
__u32 comp_vector;
__s32 comp_channel;
__u32 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
+};
+
+enum ib_uverbs_ex_create_cq_flags {
+ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
+ IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
};
struct ib_uverbs_ex_create_cq {
- __u64 user_handle;
+ __aligned_u64 user_handle;
__u32 cqe;
__u32 comp_vector;
__s32 comp_channel;
__u32 comp_mask;
- __u32 flags;
+ __u32 flags; /* bitmask of ib_uverbs_ex_create_cq_flags */
__u32 reserved;
};
struct ib_uverbs_create_cq_resp {
__u32 cq_handle;
__u32 cqe;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_ex_create_cq_resp {
@@ -407,32 +446,32 @@
};
struct ib_uverbs_resize_cq {
- __u64 response;
+ __aligned_u64 response;
__u32 cq_handle;
__u32 cqe;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_resize_cq_resp {
__u32 cqe;
__u32 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_poll_cq {
- __u64 response;
+ __aligned_u64 response;
__u32 cq_handle;
__u32 ne;
};
struct ib_uverbs_wc {
- __u64 wr_id;
+ __aligned_u64 wr_id;
__u32 status;
__u32 opcode;
__u32 vendor_err;
__u32 byte_len;
union {
- __u32 imm_data;
+ __be32 imm_data;
__u32 invalidate_rkey;
} ex;
__u32 qp_num;
@@ -458,7 +497,7 @@
};
struct ib_uverbs_destroy_cq {
- __u64 response;
+ __aligned_u64 response;
__u32 cq_handle;
__u32 reserved;
};
@@ -527,8 +566,8 @@
};
struct ib_uverbs_create_qp {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 send_cq_handle;
__u32 recv_cq_handle;
@@ -542,7 +581,7 @@
__u8 qp_type;
__u8 is_srq;
__u8 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
enum ib_uverbs_create_qp_mask {
@@ -553,8 +592,22 @@
IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE,
};
+enum {
+ /*
+ * This value is equal to IB_QP_DEST_QPN.
+ */
+ IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20,
+};
+
+enum {
+ /*
+ * This value is equal to IB_QP_RATE_LIMIT.
+ */
+ IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25,
+};
+
struct ib_uverbs_ex_create_qp {
- __u64 user_handle;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 send_cq_handle;
__u32 recv_cq_handle;
@@ -571,17 +624,17 @@
__u32 comp_mask;
__u32 create_flags;
__u32 rwq_ind_tbl_handle;
- __u32 reserved1;
+ __u32 source_qpn;
};
struct ib_uverbs_open_qp {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 qpn;
__u8 qp_type;
__u8 reserved[7];
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
/* also used for open response */
@@ -594,6 +647,7 @@
__u32 max_recv_sge;
__u32 max_inline_data;
__u32 reserved;
+ __u32 driver_data[0];
};
struct ib_uverbs_ex_create_qp_resp {
@@ -622,10 +676,10 @@
};
struct ib_uverbs_query_qp {
- __u64 response;
+ __aligned_u64 response;
__u32 qp_handle;
__u32 attr_mask;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_qp_resp {
@@ -659,7 +713,7 @@
__u8 alt_timeout;
__u8 sq_sig_all;
__u8 reserved[5];
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_modify_qp {
@@ -689,14 +743,22 @@
__u8 alt_port_num;
__u8 alt_timeout;
__u8 reserved[2];
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
+};
+
+struct ib_uverbs_ex_modify_qp {
+ struct ib_uverbs_modify_qp base;
+ __u32 rate_limit;
+ __u32 reserved;
};
-struct ib_uverbs_modify_qp_resp {
+struct ib_uverbs_ex_modify_qp_resp {
+ __u32 comp_mask;
+ __u32 response_length;
};
struct ib_uverbs_destroy_qp {
- __u64 response;
+ __aligned_u64 response;
__u32 qp_handle;
__u32 reserved;
};
@@ -712,30 +774,48 @@
* document the ABI.
*/
struct ib_uverbs_sge {
- __u64 addr;
+ __aligned_u64 addr;
__u32 length;
__u32 lkey;
};
+enum ib_uverbs_wr_opcode {
+ IB_UVERBS_WR_RDMA_WRITE = 0,
+ IB_UVERBS_WR_RDMA_WRITE_WITH_IMM = 1,
+ IB_UVERBS_WR_SEND = 2,
+ IB_UVERBS_WR_SEND_WITH_IMM = 3,
+ IB_UVERBS_WR_RDMA_READ = 4,
+ IB_UVERBS_WR_ATOMIC_CMP_AND_SWP = 5,
+ IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD = 6,
+ IB_UVERBS_WR_LOCAL_INV = 7,
+ IB_UVERBS_WR_BIND_MW = 8,
+ IB_UVERBS_WR_SEND_WITH_INV = 9,
+ IB_UVERBS_WR_TSO = 10,
+ IB_UVERBS_WR_RDMA_READ_WITH_INV = 11,
+ IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12,
+ IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13,
+ /* Review enum ib_wr_opcode before modifying this */
+};
+
struct ib_uverbs_send_wr {
- __u64 wr_id;
+ __aligned_u64 wr_id;
__u32 num_sge;
- __u32 opcode;
+ __u32 opcode; /* see enum ib_uverbs_wr_opcode */
__u32 send_flags;
union {
- __u32 imm_data;
+ __be32 imm_data;
__u32 invalidate_rkey;
} ex;
union {
struct {
- __u64 remote_addr;
+ __aligned_u64 remote_addr;
__u32 rkey;
__u32 reserved;
} rdma;
struct {
- __u64 remote_addr;
- __u64 compare_add;
- __u64 swap;
+ __aligned_u64 remote_addr;
+ __aligned_u64 compare_add;
+ __aligned_u64 swap;
__u32 rkey;
__u32 reserved;
} atomic;
@@ -749,7 +829,7 @@
};
struct ib_uverbs_post_send {
- __u64 response;
+ __aligned_u64 response;
__u32 qp_handle;
__u32 wr_count;
__u32 sge_count;
@@ -762,13 +842,13 @@
};
struct ib_uverbs_recv_wr {
- __u64 wr_id;
+ __aligned_u64 wr_id;
__u32 num_sge;
__u32 reserved;
};
struct ib_uverbs_post_recv {
- __u64 response;
+ __aligned_u64 response;
__u32 qp_handle;
__u32 wr_count;
__u32 sge_count;
@@ -781,7 +861,7 @@
};
struct ib_uverbs_post_srq_recv {
- __u64 response;
+ __aligned_u64 response;
__u32 srq_handle;
__u32 wr_count;
__u32 sge_count;
@@ -794,15 +874,17 @@
};
struct ib_uverbs_create_ah {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 reserved;
struct ib_uverbs_ah_attr attr;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_create_ah_resp {
__u32 ah_handle;
+ __u32 driver_data[0];
};
struct ib_uverbs_destroy_ah {
@@ -814,7 +896,7 @@
__u32 qp_handle;
__u16 mlid;
__u16 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_detach_mcast {
@@ -822,7 +904,7 @@
__u32 qp_handle;
__u16 mlid;
__u16 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_flow_spec_hdr {
@@ -830,7 +912,7 @@
__u16 size;
__u16 reserved;
/* followed by flow_spec */
- __u64 flow_spec_data[0];
+ __aligned_u64 flow_spec_data[0];
};
struct ib_uverbs_flow_eth_filter {
@@ -916,6 +998,141 @@
struct ib_uverbs_flow_ipv6_filter mask;
};
+struct ib_uverbs_flow_spec_action_tag {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ __u32 tag_id;
+ __u32 reserved1;
+};
+
+struct ib_uverbs_flow_spec_action_drop {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+};
+
+struct ib_uverbs_flow_spec_action_handle {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ __u32 handle;
+ __u32 reserved1;
+};
+
+struct ib_uverbs_flow_spec_action_count {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ __u32 handle;
+ __u32 reserved1;
+};
+
+struct ib_uverbs_flow_tunnel_filter {
+ __be32 tunnel_id;
+};
+
+struct ib_uverbs_flow_spec_tunnel {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_tunnel_filter val;
+ struct ib_uverbs_flow_tunnel_filter mask;
+};
+
+struct ib_uverbs_flow_spec_esp_filter {
+ __u32 spi;
+ __u32 seq;
+};
+
+struct ib_uverbs_flow_spec_esp {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_spec_esp_filter val;
+ struct ib_uverbs_flow_spec_esp_filter mask;
+};
+
+struct ib_uverbs_flow_gre_filter {
+ /* c_ks_res0_ver field is bits 0-15 in offset 0 of a standard GRE header:
+ * bit 0 - C - checksum bit.
+ * bit 1 - reserved. set to 0.
+ * bit 2 - key bit.
+ * bit 3 - sequence number bit.
+ * bits 4:12 - reserved. set to 0.
+ * bits 13:15 - GRE version.
+ */
+ __be16 c_ks_res0_ver;
+ __be16 protocol;
+ __be32 key;
+};
+
+struct ib_uverbs_flow_spec_gre {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_gre_filter val;
+ struct ib_uverbs_flow_gre_filter mask;
+};
+
+struct ib_uverbs_flow_mpls_filter {
+ /* The field includes the entire MPLS label:
+ * bits 0:19 - label field.
+ * bits 20:22 - traffic class field.
+ * bits 23 - bottom of stack bit.
+ * bits 24:31 - ttl field.
+ */
+ __be32 label;
+};
+
+struct ib_uverbs_flow_spec_mpls {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_mpls_filter val;
+ struct ib_uverbs_flow_mpls_filter mask;
+};
+
struct ib_uverbs_flow_attr {
__u32 type;
__u16 size;
@@ -948,27 +1165,27 @@
};
struct ib_uverbs_create_srq {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 max_wr;
__u32 max_sge;
__u32 srq_limit;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_create_xsrq {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 srq_type;
__u32 pd_handle;
__u32 max_wr;
__u32 max_sge;
__u32 srq_limit;
- __u32 reserved;
+ __u32 max_num_tags;
__u32 xrcd_handle;
__u32 cq_handle;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_create_srq_resp {
@@ -976,6 +1193,7 @@
__u32 max_wr;
__u32 max_sge;
__u32 srqn;
+ __u32 driver_data[0];
};
struct ib_uverbs_modify_srq {
@@ -983,14 +1201,14 @@
__u32 attr_mask;
__u32 max_wr;
__u32 srq_limit;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_srq {
- __u64 response;
+ __aligned_u64 response;
__u32 srq_handle;
__u32 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_srq_resp {
@@ -1001,7 +1219,7 @@
};
struct ib_uverbs_destroy_srq {
- __u64 response;
+ __aligned_u64 response;
__u32 srq_handle;
__u32 reserved;
};
@@ -1013,11 +1231,13 @@
struct ib_uverbs_ex_create_wq {
__u32 comp_mask;
__u32 wq_type;
- __u64 user_handle;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 cq_handle;
__u32 max_wr;
__u32 max_sge;
+ __u32 create_flags; /* Use enum ib_wq_flags */
+ __u32 reserved;
};
struct ib_uverbs_ex_create_wq_resp {
@@ -1046,6 +1266,8 @@
__u32 wq_handle;
__u32 wq_state;
__u32 curr_wq_state;
+ __u32 flags; /* Use enum ib_wq_flags */
+ __u32 flags_mask; /* Use enum ib_wq_flags */
};
/* Prevent memory allocation rather than max expected size */
@@ -1072,4 +1294,18 @@
__u32 ind_tbl_handle;
};
+struct ib_uverbs_cq_moderation {
+ __u16 cq_count;
+ __u16 cq_period;
+};
+
+struct ib_uverbs_ex_modify_cq {
+ __u32 cq_handle;
+ __u32 attr_mask;
+ struct ib_uverbs_cq_moderation attr;
+ __u32 reserved;
+};
+
+#define IB_DEVICE_NAME_MAX 64
+
#endif /* IB_USER_VERBS_H */
diff --git a/sys/ofed/include/uapi/rdma/mlx5-abi.h b/sys/ofed/include/uapi/rdma/mlx5-abi.h
--- a/sys/ofed/include/uapi/rdma/mlx5-abi.h
+++ b/sys/ofed/include/uapi/rdma/mlx5-abi.h
@@ -281,4 +281,22 @@
__u32 comp_mask;
__u32 reserved;
};
+
+enum mlx5_ib_mmap_cmd {
+ MLX5_IB_MMAP_REGULAR_PAGE = 0,
+ MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ MLX5_IB_MMAP_WC_PAGE = 2,
+ MLX5_IB_MMAP_NC_PAGE = 3,
+ /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+ MLX5_IB_MMAP_CORE_CLOCK = 5,
+ MLX5_IB_MMAP_ALLOC_WC = 6,
+ MLX5_IB_MMAP_CLOCK_INFO = 7,
+ MLX5_IB_MMAP_DEVICE_MEM = 8,
+};
+
+/* Bit indexes for the mlx5_alloc_ucontext_resp.clock_info_versions bitmap */
+enum {
+ MLX5_IB_CLOCK_INFO_V1 = 0,
+};
+
#endif /* MLX5_ABI_USER_H */
diff --git a/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_cmds.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_USER_IOCTL_CMDS_H
+#define MLX5_USER_IOCTL_CMDS_H
+
+#include <linux/types.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+
+enum mlx5_ib_create_flow_action_attrs {
+ /* This attribute belong to the driver namespace */
+ MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_alloc_dm_attrs {
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
+};
+
+enum mlx5_ib_devx_methods {
+ MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_DEVX_QUERY_UAR,
+ MLX5_IB_METHOD_DEVX_QUERY_EQN,
+ MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
+};
+
+enum mlx5_ib_devx_other_attrs {
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_create_attrs {
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+};
+
+enum mlx5_ib_devx_query_uar_attrs {
+ MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+};
+
+enum mlx5_ib_devx_obj_destroy_attrs {
+ MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_obj_modify_attrs {
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_query_attrs {
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_query_async_attrs {
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
+};
+
+enum mlx5_ib_devx_subscribe_event_attrs {
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
+};
+
+enum mlx5_ib_devx_query_eqn_attrs {
+ MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+};
+
+enum mlx5_ib_devx_obj_methods {
+ MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+ MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+ MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+ MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
+};
+
+enum mlx5_ib_var_alloc_attrs {
+ MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+ MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+ MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+};
+
+enum mlx5_ib_var_obj_destroy_attrs {
+ MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_var_obj_methods {
+ MLX5_IB_METHOD_VAR_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_VAR_OBJ_DESTROY,
+};
+
+enum mlx5_ib_uar_alloc_attrs {
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE,
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+};
+
+enum mlx5_ib_uar_obj_destroy_attrs {
+ MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_uar_obj_methods {
+ MLX5_IB_METHOD_UAR_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_UAR_OBJ_DESTROY,
+};
+
+enum mlx5_ib_devx_umem_reg_attrs {
+ MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+};
+
+enum mlx5_ib_devx_umem_dereg_attrs {
+ MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_pp_obj_methods {
+ MLX5_IB_METHOD_PP_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_PP_OBJ_DESTROY,
+};
+
+enum mlx5_ib_pp_alloc_attrs {
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX,
+};
+
+enum mlx5_ib_pp_obj_destroy_attrs {
+ MLX5_IB_ATTR_PP_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_umem_methods {
+ MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+};
+
+enum mlx5_ib_devx_async_cmd_fd_alloc_attrs {
+ MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_async_event_fd_alloc_attrs {
+ MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+};
+
+enum mlx5_ib_devx_async_cmd_fd_methods {
+ MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_async_event_fd_methods {
+ MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_objects {
+ MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ MLX5_IB_OBJECT_VAR,
+ MLX5_IB_OBJECT_PP,
+ MLX5_IB_OBJECT_UAR,
+};
+
+enum mlx5_ib_flow_matcher_create_attrs {
+ MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
+};
+
+enum mlx5_ib_flow_matcher_destroy_attrs {
+ MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_flow_matcher_methods {
+ MLX5_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+};
+
+#define MLX5_IB_DW_MATCH_PARAM 0x80
+
+struct mlx5_ib_match_params {
+ __u32 match_params[MLX5_IB_DW_MATCH_PARAM];
+};
+
+enum mlx5_ib_flow_type {
+ MLX5_IB_FLOW_TYPE_NORMAL,
+ MLX5_IB_FLOW_TYPE_SNIFFER,
+ MLX5_IB_FLOW_TYPE_ALL_DEFAULT,
+ MLX5_IB_FLOW_TYPE_MC_DEFAULT,
+};
+
+enum mlx5_ib_create_flow_attrs {
+ MLX5_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+};
+
+enum mlx5_ib_destoy_flow_attrs {
+ MLX5_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_flow_methods {
+ MLX5_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_DESTROY_FLOW,
+};
+
+enum mlx5_ib_flow_action_methods {
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+};
+
+enum mlx5_ib_create_flow_action_create_modify_header_attrs {
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+};
+
+enum mlx5_ib_create_flow_action_create_packet_reformat_attrs {
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+};
+
+#endif
diff --git a/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_verbs.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_USER_IOCTL_VERBS_H
+#define MLX5_USER_IOCTL_VERBS_H
+
+#include <linux/types.h>
+
+enum mlx5_ib_uapi_flow_action_flags {
+ MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0,
+};
+
+enum mlx5_ib_uapi_flow_table_type {
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0,
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1,
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB = 0x2,
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX = 0x3,
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX = 0x4,
+};
+
+enum mlx5_ib_uapi_flow_action_packet_reformat_type {
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0,
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1,
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2,
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
+};
+
+struct mlx5_ib_uapi_devx_async_cmd_hdr {
+ __aligned_u64 wr_id;
+ __u8 out_data[];
+};
+
+enum mlx5_ib_uapi_dm_type {
+ MLX5_IB_UAPI_DM_TYPE_MEMIC,
+ MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM,
+ MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM,
+};
+
+enum mlx5_ib_uapi_devx_create_event_channel_flags {
+ MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA = 1 << 0,
+};
+
+struct mlx5_ib_uapi_devx_async_event_hdr {
+ __aligned_u64 cookie;
+ __u8 out_data[];
+};
+
+enum mlx5_ib_uapi_pp_alloc_flags {
+ MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX = 1 << 0,
+};
+
+enum mlx5_ib_uapi_uar_alloc_type {
+ MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF = 0x0,
+ MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC = 0x1,
+};
+
+#endif
+
diff --git a/sys/ofed/include/uapi/rdma/rdma_user_ioctl.h b/sys/ofed/include/uapi/rdma/rdma_user_ioctl.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/uapi/rdma/rdma_user_ioctl.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
+/*
+ * Copyright (c) 2016 Mellanox Technologies, LTD. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_USER_IOCTL_H
+#define RDMA_USER_IOCTL_H
+
+#ifdef _KERNEL
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#else
+#include <infiniband/types.h>
+#include <sys/ioccom.h>
+#endif
+
+#include <rdma/ib_user_mad.h>
+#include <rdma/rdma_user_ioctl_cmds.h>
+
+/* Legacy name, for user space application which already use it */
+#define IB_IOCTL_MAGIC RDMA_IOCTL_MAGIC
+
+/*
+ * General blocks assignments
+ * It is closed on purpose do not expose it it user space
+ * #define MAD_CMD_BASE 0x00
+ */
+
+/* MAD specific section */
+#define IB_USER_MAD_REGISTER_AGENT _IOWR(RDMA_IOCTL_MAGIC, 0x01, struct ib_user_mad_reg_req)
+#define IB_USER_MAD_UNREGISTER_AGENT _IOW(RDMA_IOCTL_MAGIC, 0x02, __u32)
+#define IB_USER_MAD_ENABLE_PKEY _IO(RDMA_IOCTL_MAGIC, 0x03)
+#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(RDMA_IOCTL_MAGIC, 0x04, struct ib_user_mad_reg_req2)
+
+#endif /* RDMA_USER_IOCTL_H */
diff --git a/sys/ofed/include/uapi/rdma/rdma_user_ioctl_cmds.h b/sys/ofed/include/uapi/rdma/rdma_user_ioctl_cmds.h
new file mode 100644
--- /dev/null
+++ b/sys/ofed/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_USER_IOCTL_CMDS_H
+#define RDMA_USER_IOCTL_CMDS_H
+
+#ifdef _KERNEL
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#else
+#include <infiniband/types.h>
+#include <sys/ioccom.h>
+#endif
+
+/* Documentation/ioctl/ioctl-number.rst */
+#define RDMA_IOCTL_MAGIC 0x1b
+#define RDMA_VERBS_IOCTL \
+ _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr)
+
+enum {
+ /* User input */
+ UVERBS_ATTR_F_MANDATORY = 1U << 0,
+ /*
+ * Valid output bit should be ignored and considered set in
+ * mandatory fields. This bit is kernel output.
+ */
+ UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1,
+};
+
+struct ib_uverbs_attr {
+ __u16 attr_id; /* command specific type attribute */
+ __u16 len; /* only for pointers and IDRs array */
+ __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */
+ union {
+ struct {
+ __u8 elem_id;
+ __u8 reserved;
+ } enum_data;
+ __u16 reserved;
+ } attr_data;
+ union {
+ /*
+ * ptr to command, inline data, idr/fd or
+ * ptr to __u32 array of IDRs
+ */
+ __aligned_u64 data;
+ /* Used by FD_IN and FD_OUT */
+ __s64 data_s64;
+ };
+};
+
+struct ib_uverbs_ioctl_hdr {
+ __u16 length;
+ __u16 object_id;
+ __u16 method_id;
+ __u16 num_attrs;
+ __aligned_u64 reserved1;
+ __u32 driver_id;
+ __u32 reserved2;
+ struct ib_uverbs_attr attrs[0];
+};
+
+#endif

File Metadata

Mime Type
text/plain
Expires
Fri, Jan 10, 6:45 AM (12 h, 15 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15740314
Default Alt Text
D31149.diff (835 KB)

Event Timeline