diff --git a/Makefile.am b/Makefile.am
index a66dfda..27fce57 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -20,13 +20,13 @@ EXTRA_DIST = autogen.sh
 
 include_HEADERS = include/gdsync.h
 libgdsyncincludedir = $(includedir)/gdsync
-libgdsyncinclude_HEADERS = include/gdsync/core.h include/gdsync/device.cuh  include/gdsync/mlx5.h include/gdsync/tools.h
+libgdsyncinclude_HEADERS = include/gdsync/core.h include/gdsync/device.cuh include/gdsync/mlx5.h include/gdsync/tools.h
 
 src_libgdsync_la_CFLAGS = $(AM_CFLAGS)
-src_libgdsync_la_SOURCES = src/gdsync.cpp src/memmgr.cpp src/mem.cpp src/objs.cpp src/apis.cpp src/mlx5.cpp include/gdsync.h 
+src_libgdsync_la_SOURCES = src/gdsync.cpp src/memmgr.cpp src/mem.cpp src/objs.cpp src/apis.cpp src/mlx5.cpp src/mlx5-exp.cpp include/gdsync.h 
 src_libgdsync_la_LDFLAGS = -version-info @VERSION_INFO@
 
-noinst_HEADERS = src/mem.hpp src/memmgr.hpp src/objs.hpp src/rangeset.hpp src/utils.hpp src/archutils.h src/mlnxutils.h
+noinst_HEADERS = src/mem.hpp src/memmgr.hpp src/objs.hpp src/rangeset.hpp src/utils.hpp src/archutils.h src/mlnxutils.h src/mlx5-exp.hpp
 
 # if enabled at configure time
 
@@ -36,7 +36,7 @@ bin_PROGRAMS = tests/gds_kernel_latency tests/gds_poll_lat tests/gds_kernel_loop
 noinst_PROGRAMS = tests/rstest tests/wqtest
 
 tests_gds_kernel_latency_SOURCES = tests/gds_kernel_latency.c tests/gpu_kernels.cu tests/pingpong.c tests/gpu.cpp
-tests_gds_kernel_latency_LDADD = $(top_builddir)/src/libgdsync.la -lmpi $(LIBGDSTOOLS) -lgdrapi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS)
+tests_gds_kernel_latency_LDADD = $(top_builddir)/src/libgdsync.la $(MPILDFLAGS) $(LIBGDSTOOLS) -lgdrapi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS)
 
 tests_rstest_SOURCES = tests/rstest.cpp
 tests_rstest_LDADD = 
@@ -45,10 +45,10 @@ tests_wqtest_SOURCES = tests/task_queue_test.cpp
 tests_wqtest_LDADD = $(PTHREAD_LIBS)
 
 tests_gds_poll_lat_SOURCES = tests/gds_poll_lat.c tests/gpu.cpp tests/gpu_kernels.cu
-tests_gds_poll_lat_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi -lmpi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS)
+tests_gds_poll_lat_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi $(MPILDFLAGS) $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS)
 
 tests_gds_sanity_SOURCES = tests/gds_sanity.cpp tests/gpu.cpp tests/gpu_kernels.cu
-tests_gds_sanity_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi -lmpi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS)
+tests_gds_sanity_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi $(MPILDFLAGS) $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS)
 
 tests_gds_kernel_loopback_latency_SOURCES = tests/gds_kernel_loopback_latency.c tests/pingpong.c tests/gpu.cpp tests/gpu_kernels.cu
 tests_gds_kernel_loopback_latency_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS)
diff --git a/configure.ac b/configure.ac
index a79aed6..e20f313 100644
--- a/configure.ac
+++ b/configure.ac
@@ -93,25 +93,54 @@ else
     AC_SUBST(LIBGDSTOOLS)
 fi
 
-AC_ARG_WITH([mpi],
-    AC_HELP_STRING([--with-mpi], [ Set path to mpi installation ]))
-if test x$with_mpi = x || test x$with_mpi = xno; then
+AC_ARG_WITH([spectrum-mpi],
+    AC_HELP_STRING([--with-spectrum-mpi], [ Set path to Spectrum MPI installation ]))
+if test x$with_spectrum_mpi = x || test x$with_spectrum_mpi = xno; then
     # assuming system location
     mpi_home=/usr
-    MPICC=$with_home/bin/mpicc
-    MPICXX=$with_home/bin/mpic++
+    MPICC=/bin/mpicc
+    MPICXX=/bin/mpic++
+    MPILDFLAGS="-lmpi_ibm"
 else
-    if test -d $with_mpi; then
-        mpi_home=$with_mpi
+    if test -d $with_spectrum_mpi; then
+        mpi_home=$with_spectrum_mpi
         MPICC=${mpi_home}/bin/mpicc
         MPICXX=${mpi_home}/bin/mpic++
         CPPFLAGS="$CPPFLAGS -I${mpi_home}/include"
         LDFLAGS="$LDFLAGS -L${mpi_home}/lib -L${mpi_home}/lib64"
+        MPILDFLAGS="-lmpi_ibm"
     else
         echo "MPI dir does not exist"
     fi
 fi
 
+AC_ARG_WITH([mpi],
+    AC_HELP_STRING([--with-mpi], [ Set path to MPI installation ]))
+if test x$with_spectrum_mpi = x || test x$with_spectrum_mpi == xno; then
+    if test x$with_mpi = x || test x$with_mpi = xno; then
+        # assuming system location
+        mpi_home=/usr
+        MPICC=/bin/mpicc
+        MPICXX=/bin/mpic++
+        MPILDFLAGS="-lmpi"
+    else
+        if test -d $with_mpi; then
+            mpi_home=$with_mpi
+            MPICC=${mpi_home}/bin/mpicc
+            MPICXX=${mpi_home}/bin/mpic++
+            CPPFLAGS="$CPPFLAGS -I${mpi_home}/include"
+            LDFLAGS="$LDFLAGS -L${mpi_home}/lib -L${mpi_home}/lib64"
+            MPILDFLAGS="-lmpi"
+        else
+            echo "MPI dir does not exist"
+        fi
+    fi
+fi
+
+if test x$with_spectrum_mpi != x && test x$with_spectrum_mpi != xno && test x$with_mpi != x && test x$with_mpi != xno; then
+    AC_MSG_ERROR([--with-mpi and --with-spectrum-mpi are mutually exclusive.])
+fi
+
 dnl Specify CUDA Location
 AC_ARG_WITH(cuda-toolkit,
   AC_HELP_STRING([--with-cuda-toolkit=CUDATKDIR], [ Specify CUDA toolkit installation directory (default: /usr/local/cuda)]),
@@ -186,6 +215,7 @@ AC_MSG_NOTICE([Setting MPI_PATH = ${mpi_home} ])
 AC_SUBST( MPI_PATH, [${mpi_home} ])
 AC_SUBST( MPICC, [${MPICC} ])
 AC_SUBST( MPICXX, [${MPICXX} ])
+AC_SUBST( MPILDFLAGS, [${MPILDFLAGS} ])
 
 CPPFLAGS="$CPPFLAGS -I$CUDA_DRV_PATH/include -I$CUDA_PATH/include"
 LDFLAGS="$LDFLAGS -L$CUDA_DRV_PATH/lib64 -L$CUDA_DRV_PATH/lib -L$CUDA_PATH/lib64 -L$CUDA_PATH/lib"
diff --git a/include/gdsync/core.h b/include/gdsync/core.h
index 7ff0cbb..a74299d 100644
--- a/include/gdsync/core.h
+++ b/include/gdsync/core.h
@@ -40,35 +40,43 @@
           ((((v) & 0x0000ffffU) >> 0 ) >= (unsigned)GDS_API_MINOR_VERSION) )
 
 typedef enum gds_param {
-    GDS_PARAM_VERSION,
-    GDS_NUM_PARAMS
+        GDS_PARAM_VERSION,
+        GDS_NUM_PARAMS
 } gds_param_t;
 
 int gds_query_param(gds_param_t param, int *value);
 
 enum gds_create_qp_flags {
-    GDS_CREATE_QP_DEFAULT      = 0,
-    GDS_CREATE_QP_WQ_ON_GPU    = 1<<0,
-    GDS_CREATE_QP_TX_CQ_ON_GPU = 1<<1,
-    GDS_CREATE_QP_RX_CQ_ON_GPU = 1<<2,
-    GDS_CREATE_QP_WQ_DBREC_ON_GPU = 1<<5,
+        GDS_CREATE_QP_DEFAULT      = 0,
+        GDS_CREATE_QP_WQ_ON_GPU    = 1<<0,
+        GDS_CREATE_QP_TX_CQ_ON_GPU = 1<<1,
+        GDS_CREATE_QP_RX_CQ_ON_GPU = 1<<2,
+        GDS_CREATE_QP_WQ_DBREC_ON_GPU = 1<<5,
 };
 
-typedef struct ibv_exp_qp_init_attr gds_qp_init_attr_t;
-typedef struct ibv_exp_send_wr gds_send_wr;
+typedef struct ibv_qp_init_attr gds_qp_init_attr_t;
+typedef struct ibv_send_wr gds_send_wr;
 
-struct gds_cq {
+typedef enum gds_driver_type {
+        GDS_DRIVER_TYPE_UNSUPPORTED = 0,
+        GDS_DRIVER_TYPE_MLX5_EXP,
+        GDS_DRIVER_TYPE_MLX5_DV,
+        GDS_DRIVER_TYPE_MLX5_DEVX
+} gds_driver_type_t;
+
+typedef struct gds_cq {
         struct ibv_cq *cq;
         uint32_t curr_offset;
-};
+        gds_driver_type_t dtype;
+} gds_cq_t;
 
-struct gds_qp {
+typedef struct gds_qp {
         struct ibv_qp *qp;
-        struct gds_cq send_cq;
-        struct gds_cq recv_cq;
-        struct ibv_exp_res_domain * res_domain;
+        struct gds_cq *send_cq;
+        struct gds_cq *recv_cq;
         struct ibv_context *dev_context;
-};
+        gds_driver_type_t dtype;
+} gds_qp_t;
 
 /* \brief: Create a peer-enabled QP attached to the specified GPU id.
  *
diff --git a/src/apis.cpp b/src/apis.cpp
index cd532d7..0801771 100644
--- a/src/apis.cpp
+++ b/src/apis.cpp
@@ -51,6 +51,7 @@
 #include "utils.hpp"
 #include "archutils.h"
 #include "mlnxutils.h"
+#include "mlx5-exp.hpp"
 
 
 //-----------------------------------------------------------------------------
@@ -171,33 +172,24 @@ int gds_post_recv(struct gds_qp *qp, struct ibv_recv_wr *wr, struct ibv_recv_wr
 
 //-----------------------------------------------------------------------------
 
-int gds_prepare_send(struct gds_qp *qp, gds_send_wr *p_ewr, 
+int gds_prepare_send(struct gds_qp *gqp, gds_send_wr *p_ewr, 
                      gds_send_wr **bad_ewr, 
                      gds_send_request_t *request)
 {
         int ret = 0;
+        gds_mlx5_exp_qp_t *gmexpqp;
+
         gds_init_send_info(request);
-        assert(qp);
-        assert(qp->qp);
-        ret = ibv_exp_post_send(qp->qp, p_ewr, bad_ewr);
-        if (ret) {
+        assert(gqp);
+        assert(gqp->qp);
+        assert(gqp->dtype == GDS_DRIVER_TYPE_MLX5_EXP);
+
+        gmexpqp = to_gds_mexp_qp(gqp);
+
+        ret = gds_mlx5_exp_prepare_send(gmexpqp, p_ewr, bad_ewr, request);
+        if (ret)
+                gds_err("Error %d in gds_mlx5_exp_prepare_send.\n", ret);
 
-                if (ret == ENOMEM) {
-                        // out of space error can happen too often to report
-                        gds_dbg("ENOMEM error %d in ibv_exp_post_send\n", ret);
-                } else {
-                        gds_err("error %d in ibv_exp_post_send\n", ret);
-                }
-                goto out;
-        }
-        
-        ret = ibv_exp_peer_commit_qp(qp->qp, &request->commit);
-        if (ret) {
-                gds_err("error %d in ibv_exp_peer_commit_qp\n", ret);
-                //gds_wait_kernel();
-                goto out;
-        }
-out:
         return ret;
 }
 
diff --git a/src/gdsync.cpp b/src/gdsync.cpp
index 90d5508..23907fe 100644
--- a/src/gdsync.cpp
+++ b/src/gdsync.cpp
@@ -43,6 +43,7 @@
 #include "archutils.h"
 #include "mlnxutils.h"
 #include "task_queue.hpp"
+#include "mlx5-exp.hpp"
 
 //-----------------------------------------------------------------------------
 
@@ -1678,143 +1679,20 @@ gds_peer *peer_from_stream(CUstream stream)
 
 //-----------------------------------------------------------------------------
 
-static ibv_exp_res_domain *gds_create_res_domain(struct ibv_context *context)
-{
-        if (!context) {
-                gds_err("invalid context");
-                return NULL;
-        }
-
-        ibv_exp_res_domain_init_attr res_domain_attr;
-        memset(&res_domain_attr, 0, sizeof(res_domain_attr));
-
-        res_domain_attr.comp_mask |= IBV_EXP_RES_DOMAIN_THREAD_MODEL;
-        res_domain_attr.thread_model = IBV_EXP_THREAD_SINGLE;
-
-        ibv_exp_res_domain *res_domain = ibv_exp_create_res_domain(context, &res_domain_attr);
-        if (!res_domain) {
-                gds_warn("Can't create resource domain\n");
-        }
-
-        return res_domain;
-}
-
-//-----------------------------------------------------------------------------
-
-static struct gds_cq *
-gds_create_cq_internal(struct ibv_context *context, int cqe,
-                        void *cq_context, struct ibv_comp_channel *channel,
-                        int comp_vector, int gpu_id, gds_alloc_cq_flags_t flags,
-                        struct ibv_exp_res_domain * res_domain)
-{
-        struct gds_cq *gcq = NULL;
-        ibv_exp_cq_init_attr attr;
-        gds_peer *peer = NULL;
-        gds_peer_attr *peer_attr = NULL;
-        int ret=0;
-
-        if(!context)
-        {
-            gds_dbg("Invalid input context\n");
-            return NULL;
-        }
-
-        gcq = (struct gds_cq*)calloc(1, sizeof(struct gds_cq));
-        if (!gcq) {
-            gds_err("cannot allocate memory\n");
-            return NULL;
-        }
-
-        //Here we need to recover peer and peer_attr pointers to set alloc_type and alloc_flags
-        //before ibv_exp_create_cq
-        ret = gds_register_peer_by_ordinal(gpu_id, &peer, &peer_attr);
-        if (ret) {
-            gds_err("error %d while registering GPU peer\n", ret);
-            return NULL;
-        }
-        assert(peer);
-        assert(peer_attr);
-
-        peer->alloc_type = gds_peer::CQ;
-        peer->alloc_flags = flags;
-
-        attr.comp_mask = IBV_EXP_CQ_INIT_ATTR_PEER_DIRECT;
-        attr.flags = 0; // see ibv_exp_cq_create_flags
-        attr.peer_direct_attrs = peer_attr;
-        if (res_domain) {
-            gds_dbg("using peer->res_domain %p for CQ\n", res_domain);
-            attr.res_domain = res_domain;
-            attr.comp_mask |= IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN;
-        }
-        
-        int old_errno = errno;
-        gcq->cq = ibv_exp_create_cq(context, cqe, cq_context, channel, comp_vector, &attr);
-        if (!gcq->cq) {
-            gds_err("error %d in ibv_exp_create_cq, old errno %d\n", errno, old_errno);
-            return NULL;
-        }
-
-        return gcq;
-}
-
-//Note: general create cq function, not really used for now!
-struct gds_cq *
-gds_create_cq(struct ibv_context *context, int cqe,
-              void *cq_context, struct ibv_comp_channel *channel,
-              int comp_vector, int gpu_id, gds_alloc_cq_flags_t flags)
-{
-        int ret = 0;
-        struct gds_cq *gcq = NULL;
-        //TODO: leak of res_domain
-        struct ibv_exp_res_domain * res_domain;
-        gds_dbg("cqe=%d gpu_id=%d cq_flags=%08x\n", cqe, gpu_id, flags);
-
-        gds_peer *peer = NULL;
-        gds_peer_attr *peer_attr = NULL;
-        ret = gds_register_peer_by_ordinal(gpu_id, &peer, &peer_attr);
-        if (ret) {
-                gds_err("error %d while registering GPU peer\n", ret);
-                return NULL;
-        }
-        assert(peer);
-        assert(peer_attr);
-
-        peer->alloc_type = gds_peer::CQ;
-        peer->alloc_flags = flags;
-
-        res_domain = gds_create_res_domain(context);
-        if (res_domain)
-            gds_dbg("using res_domain %p\n", res_domain);
-        else
-            gds_warn("NOT using res_domain\n");
-
-        
-        gcq = gds_create_cq_internal(context, cqe, cq_context, channel, comp_vector, gpu_id, flags, res_domain);
-
-        if (!gcq) {
-            gds_err("error in gds_create_cq_internal\n");
-            return NULL;
-        }
-
-        return gcq;
-}
-
-//-----------------------------------------------------------------------------
-
 struct gds_qp *gds_create_qp(struct ibv_pd *pd, struct ibv_context *context,
                                 gds_qp_init_attr_t *qp_attr, int gpu_id, int flags)
 {
         int ret = 0;
-        struct gds_qp *gqp = NULL;
-        struct ibv_qp *qp = NULL;
-        struct gds_cq *rx_gcq = NULL, *tx_gcq = NULL;
+        gds_mlx5_exp_qp_t *gmexpqp = NULL;
         gds_peer *peer = NULL;
         gds_peer_attr *peer_attr = NULL;
+        gds_driver_type dtype;
         int old_errno = errno;
 
         gds_dbg("pd=%p context=%p gpu_id=%d flags=%08x current errno=%d\n", pd, context, gpu_id, flags, errno);
         assert(pd);
         assert(context);
+        assert(context->device);
         assert(qp_attr);
 
         if (flags & ~(GDS_CREATE_QP_WQ_ON_GPU|GDS_CREATE_QP_TX_CQ_ON_GPU|GDS_CREATE_QP_RX_CQ_ON_GPU|GDS_CREATE_QP_WQ_DBREC_ON_GPU)) {
@@ -1822,136 +1700,70 @@ struct gds_qp *gds_create_qp(struct ibv_pd *pd, struct ibv_context *context,
                 return NULL;
         }
 
-        gqp = (struct gds_qp*)calloc(1, sizeof(struct gds_qp));
-        if (!gqp) {
-            gds_err("cannot allocate memory\n");
-            return NULL;
-        }
-
-        gqp->dev_context=context;
-
         // peer registration
         gds_dbg("before gds_register_peer_ex\n");
         ret = gds_register_peer_by_ordinal(gpu_id, &peer, &peer_attr);
         if (ret) {
-            gds_err("error %d in gds_register_peer_ex\n", ret);
-            goto err;
-        }
-
-        gqp->res_domain = gds_create_res_domain(context);
-        if (gqp->res_domain)
-            gds_dbg("using gqp->res_domain %p\n", gqp->res_domain);
-        else
-            gds_warn("NOT using gqp->res_domain\n");
-
-        tx_gcq = gds_create_cq_internal(context, qp_attr->cap.max_send_wr, NULL, NULL, 0, gpu_id, 
-                              (flags & GDS_CREATE_QP_TX_CQ_ON_GPU) ? GDS_ALLOC_CQ_ON_GPU : GDS_ALLOC_CQ_DEFAULT, 
-                              gqp->res_domain);
-        if (!tx_gcq) {
-                ret = errno;
-                gds_err("error %d while creating TX CQ, old_errno=%d\n", ret, old_errno);
+                gds_err("error %d in gds_register_peer_ex\n", ret);
                 goto err;
         }
 
-        rx_gcq = gds_create_cq_internal(context, qp_attr->cap.max_recv_wr, NULL, NULL, 0, gpu_id, 
-                              (flags & GDS_CREATE_QP_RX_CQ_ON_GPU) ? GDS_ALLOC_CQ_ON_GPU : GDS_ALLOC_CQ_DEFAULT, 
-                              gqp->res_domain);
-        if (!rx_gcq) {
-                ret = errno;
-                gds_err("error %d while creating RX CQ\n", ret);
+        dtype = gds_get_driver_type(context->device);
+        if (dtype != GDS_DRIVER_TYPE_MLX5_EXP) {
+                gds_err("Unsupported IB device\n");
                 goto err;
         }
 
-        // peer registration
-        qp_attr->send_cq = tx_gcq->cq;
-        qp_attr->recv_cq = rx_gcq->cq;
-        qp_attr->pd = pd;
-        qp_attr->comp_mask |= IBV_EXP_QP_INIT_ATTR_PD;
-
-        peer->alloc_type = gds_peer::WQ;
-        peer->alloc_flags = GDS_ALLOC_WQ_DEFAULT | GDS_ALLOC_DBREC_DEFAULT;
-        if (flags & GDS_CREATE_QP_WQ_ON_GPU) {
-                gds_err("error, QP WQ on GPU is not supported yet\n");
-                goto err;
-        }
-        if (flags & GDS_CREATE_QP_WQ_DBREC_ON_GPU) {
-                gds_warn("QP WQ DBREC on GPU\n");
-                peer->alloc_flags |= GDS_ALLOC_DBREC_ON_GPU;
-        }        
-        qp_attr->comp_mask |= IBV_EXP_QP_INIT_ATTR_PEER_DIRECT;
-        qp_attr->peer_direct_attrs = peer_attr;
-
-        qp = ibv_exp_create_qp(context, qp_attr);
-        if (!qp)  {
-                ret = EINVAL;
-                gds_err("error in ibv_exp_create_qp\n");
+        gmexpqp = gds_mlx5_exp_create_qp(pd, context, qp_attr, peer, peer_attr, flags);
+        if (!gmexpqp) {
+                gds_err("Error in gds_mlx5_exp_create_qp.\n");
                 goto err;
         }
 
-        gqp->qp = qp;
-        gqp->send_cq.cq = qp->send_cq;
-        gqp->send_cq.curr_offset = 0;
-        gqp->recv_cq.cq = qp->recv_cq;
-        gqp->recv_cq.curr_offset = 0;
+        gds_dbg("created gds_qp=%p\n", gmexpqp->gqp);
 
-        gds_dbg("created gds_qp=%p\n", gqp);
-
-        return gqp;
+        return &gmexpqp->gqp;
 
 err:
-        gds_dbg("destroying QP\n");
-        gds_destroy_qp(gqp);
-
         return NULL;
 }
 
 //-----------------------------------------------------------------------------
 
-int gds_destroy_qp(struct gds_qp *gqp)
+int gds_destroy_cq(struct gds_cq *gcq)
 {
         int retcode = 0;
         int ret;
         
-        if(!gqp) return retcode;
+        if (!gcq) 
+                return retcode;
 
-        if(gqp->qp)
-        {
-            ret = ibv_destroy_qp(gqp->qp);
-            if (ret) {
-                    gds_err("error %d in destroy_qp\n", ret);
-                    retcode = ret;
-            }            
-        }
+        // Currently, we support only exp-verbs.
+        assert(gcq->dtype == GDS_DRIVER_TYPE_MLX5_EXP);
 
-        if(gqp->send_cq.cq)
-        {
-            ret = ibv_destroy_cq(gqp->send_cq.cq);
-            if (ret) {
-                    gds_err("error %d in destroy_cq send_cq\n", ret);
-                    retcode = ret;
-            }
-        }
+        gds_mlx5_exp_cq_t *gmexpcq = to_gds_mexp_cq(gcq);
 
-        if(gqp->recv_cq.cq)
-        {
-            ret = ibv_destroy_cq(gqp->recv_cq.cq);
-            if (ret) {
-                    gds_err("error %d in destroy_cq recv_cq\n", ret);
-                    retcode = ret;
-            }
-        }
+        retcode = gds_mlx5_exp_destroy_cq(gmexpcq);
 
-        if(gqp->res_domain) {
-            struct ibv_exp_destroy_res_domain_attr attr; //IBV_EXP_DESTROY_RES_DOMAIN_RESERVED
-            attr.comp_mask=0;
-            ret = ibv_exp_destroy_res_domain(gqp->dev_context, gqp->res_domain, &attr);
-            if (ret) {
-                    gds_err("ibv_exp_destroy_res_domain error %d: %s\n", ret, strerror(ret));
-                    retcode = ret;
-            }            
-        }
+        return retcode;
+}
+
+//-----------------------------------------------------------------------------
+
+int gds_destroy_qp(struct gds_qp *gqp)
+{
+        int retcode = 0;
+        int ret;
+        
+        if (!gqp) 
+                return retcode;
+
+        // Currently, we support only exp-verbs.
+        assert(gqp->dtype == GDS_DRIVER_TYPE_MLX5_EXP);
+
+        gds_mlx5_exp_qp_t *gmexpqp = to_gds_mexp_qp(gqp);
 
-        free(gqp);
+        retcode = gds_mlx5_exp_destroy_qp(gmexpqp);
 
         return retcode;
 }
diff --git a/src/mlx5-exp.cpp b/src/mlx5-exp.cpp
new file mode 100644
index 0000000..f76cf12
--- /dev/null
+++ b/src/mlx5-exp.cpp
@@ -0,0 +1,284 @@
+#include <string.h>
+#include <infiniband/verbs.h>
+#include <infiniband/verbs_exp.h>
+
+#include "mlx5-exp.hpp"
+#include "utils.hpp"
+
+static ibv_exp_res_domain *gds_mlx5_exp_create_res_domain(struct ibv_context *context)
+{
+        if (!context) {
+                gds_err("invalid context");
+                return NULL;
+        }
+
+        ibv_exp_res_domain_init_attr res_domain_attr;
+        memset(&res_domain_attr, 0, sizeof(res_domain_attr));
+
+        res_domain_attr.comp_mask |= IBV_EXP_RES_DOMAIN_THREAD_MODEL;
+        res_domain_attr.thread_model = IBV_EXP_THREAD_SINGLE;
+
+        ibv_exp_res_domain *res_domain = ibv_exp_create_res_domain(context, &res_domain_attr);
+        if (!res_domain) {
+                gds_warn("Can't create resource domain\n");
+        }
+
+        return res_domain;
+}
+
+//-----------------------------------------------------------------------------
+
+gds_mlx5_exp_cq_t *gds_mlx5_exp_create_cq(
+        struct ibv_context *context, int cqe,
+        void *cq_context, struct ibv_comp_channel *channel,
+        int comp_vector, gds_peer *peer, gds_peer_attr *peer_attr, gds_alloc_cq_flags_t flags,
+        struct ibv_exp_res_domain *res_domain)
+{
+        gds_mlx5_exp_cq_t *gmexpcq = NULL;
+        ibv_exp_cq_init_attr attr;
+        int ret = 0;
+
+        assert(context);
+        assert(peer);
+        assert(peer_attr);
+
+        gmexpcq = (gds_mlx5_exp_cq_t *)calloc(1, sizeof(gds_mlx5_exp_cq_t));
+        if (!gmexpcq) {
+            gds_err("cannot allocate memory\n");
+            return NULL;
+        }
+
+        peer->alloc_type = gds_peer::CQ;
+        peer->alloc_flags = flags;
+
+        attr.comp_mask = IBV_EXP_CQ_INIT_ATTR_PEER_DIRECT;
+        attr.flags = 0; // see ibv_exp_cq_create_flags
+        attr.peer_direct_attrs = peer_attr;
+        if (res_domain) {
+            gds_dbg("using peer->res_domain %p for CQ\n", res_domain);
+            attr.res_domain = res_domain;
+            attr.comp_mask |= IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN;
+            gmexpcq->res_domain = res_domain;
+        }
+        
+        int old_errno = errno;
+        gmexpcq->gcq.cq = ibv_exp_create_cq(context, cqe, cq_context, channel, comp_vector, &attr);
+        if (!gmexpcq->gcq.cq) {
+            gds_err("error %d in ibv_exp_create_cq, old errno %d\n", errno, old_errno);
+            return NULL;
+        }
+
+        gmexpcq->gcq.dtype = GDS_DRIVER_TYPE_MLX5_EXP;
+
+        return gmexpcq;
+}
+
+//-----------------------------------------------------------------------------
+
+gds_mlx5_exp_qp_t *gds_mlx5_exp_create_qp(
+        struct ibv_pd *pd, struct ibv_context *context, gds_qp_init_attr_t *qp_attr, 
+        gds_peer *peer, gds_peer_attr *peer_attr, int flags)
+{
+        int ret = 0;
+        gds_mlx5_exp_qp_t *gmexpqp = NULL;
+        struct ibv_qp *qp = NULL;
+        gds_mlx5_exp_cq_t *rx_gmexpcq = NULL, *tx_gmexpcq = NULL;
+        struct ibv_exp_qp_init_attr exp_qp_attr = {0,};
+        int old_errno = errno;
+
+        assert(pd);
+        assert(context);
+        assert(qp_attr);
+        assert(peer);
+        assert(peer_attr);
+
+        gmexpqp = (gds_mlx5_exp_qp_t *)calloc(1, sizeof(gds_mlx5_exp_qp_t));
+        if (!gmexpqp) {
+            gds_err("cannot allocate memory\n");
+            return NULL;
+        }
+        gmexpqp->gqp.dtype = GDS_DRIVER_TYPE_MLX5_EXP;
+
+        gmexpqp->gqp.dev_context = context;
+
+        gmexpqp->res_domain = gds_mlx5_exp_create_res_domain(context);
+        if (gmexpqp->res_domain)
+            gds_dbg("using res_domain %p\n", gmexpqp->res_domain);
+        else
+            gds_warn("NOT using res_domain\n");
+
+        tx_gmexpcq = gds_mlx5_exp_create_cq(
+                context, qp_attr->cap.max_send_wr, NULL, NULL, 0, peer, peer_attr,
+                (flags & GDS_CREATE_QP_TX_CQ_ON_GPU) ? GDS_ALLOC_CQ_ON_GPU : GDS_ALLOC_CQ_DEFAULT, 
+                gmexpqp->res_domain
+        );
+        if (!tx_gmexpcq) {
+                ret = errno;
+                gds_err("error %d while creating TX CQ, old_errno=%d\n", ret, old_errno);
+                goto err;
+        }
+
+        rx_gmexpcq = gds_mlx5_exp_create_cq(
+                context, qp_attr->cap.max_recv_wr, NULL, NULL, 0, peer, peer_attr,
+                (flags & GDS_CREATE_QP_RX_CQ_ON_GPU) ? GDS_ALLOC_CQ_ON_GPU : GDS_ALLOC_CQ_DEFAULT, 
+                gmexpqp->res_domain
+        );
+        if (!rx_gmexpcq) {
+                ret = errno;
+                gds_err("error %d while creating RX CQ\n", ret);
+                goto err;
+        }
+        
+        // peer registration
+        peer->alloc_type = gds_peer::WQ;
+        peer->alloc_flags = GDS_ALLOC_WQ_DEFAULT | GDS_ALLOC_DBREC_DEFAULT;
+        if (flags & GDS_CREATE_QP_WQ_ON_GPU) {
+                gds_err("error, QP WQ on GPU is not supported yet\n");
+                goto err;
+        }
+        if (flags & GDS_CREATE_QP_WQ_DBREC_ON_GPU) {
+                gds_warn("QP WQ DBREC on GPU\n");
+                peer->alloc_flags |= GDS_ALLOC_DBREC_ON_GPU;
+        }        
+
+        exp_qp_attr.send_cq = tx_gmexpcq->gcq.cq;
+        exp_qp_attr.recv_cq = rx_gmexpcq->gcq.cq;
+        exp_qp_attr.pd = pd;
+        exp_qp_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_PEER_DIRECT;
+        exp_qp_attr.peer_direct_attrs = peer_attr;
+        exp_qp_attr.qp_type = qp_attr->qp_type;
+
+        assert(sizeof(exp_qp_attr.cap) == sizeof(qp_attr->cap));
+
+        memcpy(&exp_qp_attr.cap, &qp_attr->cap, sizeof(qp_attr->cap));
+
+        qp = ibv_exp_create_qp(context, &exp_qp_attr);
+        if (!qp) {
+                ret = EINVAL;
+                gds_err("error in ibv_exp_create_qp\n");
+                goto err;
+        }
+
+        tx_gmexpcq->gcq.cq = qp->send_cq;
+        rx_gmexpcq->gcq.cq = qp->recv_cq;
+
+        gmexpqp->gqp.qp = qp;
+        gmexpqp->gqp.send_cq = &tx_gmexpcq->gcq;
+        gmexpqp->gqp.recv_cq = &rx_gmexpcq->gcq;
+
+        gds_dbg("created gds_mlx5_exp_qp=%p\n", gmexpqp);
+
+        return gmexpqp;
+
+err:
+        gds_dbg("destroying QP\n");
+        gds_mlx5_exp_destroy_qp(gmexpqp);
+
+        return NULL;
+}
+
+//-----------------------------------------------------------------------------
+
+int gds_mlx5_exp_destroy_qp(gds_mlx5_exp_qp_t *gmexpqp)
+{
+        int retcode = 0;
+        int ret;
+        
+        if (!gmexpqp) 
+                return retcode;
+        
+        assert(gmexpqp->gqp.dtype == GDS_DRIVER_TYPE_MLX5_EXP);
+
+        if (gmexpqp->gqp.qp) {
+                ret = ibv_destroy_qp(gmexpqp->gqp.qp);
+                if (ret) {
+                        gds_err("error %d in destroy_qp\n", ret);
+                        retcode = ret;
+                }            
+        }
+
+        if (gmexpqp->gqp.send_cq) {
+                ret = gds_destroy_cq(gmexpqp->gqp.send_cq);
+                if (ret) {
+                        gds_err("error %d in destroy_cq send_cq\n", ret);
+                        retcode = ret;
+                }
+        }
+
+        if (gmexpqp->gqp.recv_cq) {
+                ret = gds_destroy_cq(gmexpqp->gqp.recv_cq);
+                if (ret) {
+                        gds_err("error %d in destroy_cq recv_cq\n", ret);
+                        retcode = ret;
+                }
+        }
+
+        if (gmexpqp->res_domain) {
+            struct ibv_exp_destroy_res_domain_attr attr = {0,}; //IBV_EXP_DESTROY_RES_DOMAIN_RESERVED
+            ret = ibv_exp_destroy_res_domain(gmexpqp->gqp.dev_context, gmexpqp->res_domain, &attr);
+            if (ret) {
+                    gds_err("ibv_exp_destroy_res_domain error %d: %s\n", ret, strerror(ret));
+                    retcode = ret;
+            }            
+        }
+
+        free(gmexpqp);
+
+        return retcode;
+}
+
+//-----------------------------------------------------------------------------
+
+int gds_mlx5_exp_destroy_cq(gds_mlx5_exp_cq_t *gmexpcq)
+{
+        int retcode = 0;
+        int ret;
+        
+        if (!gmexpcq) 
+                return retcode;
+        
+        assert(gmexpcq->gcq.dtype == GDS_DRIVER_TYPE_MLX5_EXP);
+
+        if (gmexpcq->gcq.cq) {
+                ret = ibv_destroy_cq(gmexpcq->gcq.cq);
+                if (ret) {
+                        gds_err("error %d in destroy_cq\n", ret);
+                        retcode = ret;
+                }            
+        }
+
+        // res_domain will be destroyed in gds_mlx5_exp_destroy_qp.
+
+        free(gmexpcq);
+
+        return retcode;
+}
+
+//-----------------------------------------------------------------------------
+
+int gds_mlx5_exp_prepare_send(gds_mlx5_exp_qp_t *gmexpqp, gds_send_wr *p_ewr, 
+                     gds_send_wr **bad_ewr, 
+                     gds_send_request_t *request)
+{
+        int ret = 0;
+        ret = ibv_post_send(gmexpqp->gqp.qp, p_ewr, bad_ewr);
+        if (ret) {
+
+                if (ret == ENOMEM) {
+                        // out of space error can happen too often to report
+                        gds_dbg("ENOMEM error %d in ibv_post_send\n", ret);
+                } else {
+                        gds_err("error %d in ibv_post_send\n", ret);
+                }
+                goto out;
+        }
+        
+        ret = ibv_exp_peer_commit_qp(gmexpqp->gqp.qp, &request->commit);
+        if (ret) {
+                gds_err("error %d in ibv_exp_peer_commit_qp\n", ret);
+                goto out;
+        }
+out:
+        return ret;
+}
+
diff --git a/src/mlx5-exp.hpp b/src/mlx5-exp.hpp
new file mode 100644
index 0000000..d289c83
--- /dev/null
+++ b/src/mlx5-exp.hpp
@@ -0,0 +1,48 @@
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+
+#include <infiniband/verbs.h>
+#include <infiniband/verbs_exp.h>
+
+#include <gdsync.h>
+
+#include "objs.hpp"
+#include "utils.hpp"
+
+typedef struct gds_mlx5_exp_cq {
+        gds_cq_t                gcq;
+        ibv_exp_res_domain     *res_domain;
+} gds_mlx5_exp_cq_t;
+
+typedef struct gds_mlx5_exp_qp {
+        gds_qp_t                gqp;
+        ibv_exp_res_domain     *res_domain;
+} gds_mlx5_exp_qp_t;
+
+static inline gds_mlx5_exp_cq_t *to_gds_mexp_cq(gds_cq_t *gcq) {
+        assert(gcq->dtype == GDS_DRIVER_TYPE_MLX5_EXP);
+        return container_of(gcq, gds_mlx5_exp_cq_t, gcq);
+}
+
+static inline gds_mlx5_exp_qp_t *to_gds_mexp_qp(gds_qp_t *gqp) {
+        assert(gqp->dtype == GDS_DRIVER_TYPE_MLX5_EXP);
+        return container_of(gqp, gds_mlx5_exp_qp_t, gqp);
+}
+
+gds_mlx5_exp_cq_t *gds_mlx5_exp_create_cq(
+        struct ibv_context *context, int cqe,
+        void *cq_context, struct ibv_comp_channel *channel,
+        int comp_vector, gds_peer *peer, gds_peer_attr *peer_attr, gds_alloc_cq_flags_t flags,
+        struct ibv_exp_res_domain *res_domain);
+
+gds_mlx5_exp_qp_t *gds_mlx5_exp_create_qp(
+        struct ibv_pd *pd, struct ibv_context *context, gds_qp_init_attr_t *qp_attr, 
+        gds_peer *peer, gds_peer_attr *peer_attr, int flags);
+
+int gds_mlx5_exp_destroy_cq(gds_mlx5_exp_cq_t *gmexpcq);
+int gds_mlx5_exp_destroy_qp(gds_mlx5_exp_qp_t *gmexpqp);
+
+int gds_mlx5_exp_prepare_send(gds_mlx5_exp_qp_t *gmexpqp, gds_send_wr *p_ewr, 
+                     gds_send_wr **bad_ewr, 
+                     gds_send_request_t *request);
diff --git a/src/utils.hpp b/src/utils.hpp
index b501bda..1bacbb6 100644
--- a/src/utils.hpp
+++ b/src/utils.hpp
@@ -222,6 +222,26 @@ gds_peer *peer_from_stream(CUstream stream);
 
 //-----------------------------------------------------------------------------
 
+/* \brief: Get the underlying driver associated with the ibdev.
+ *
+ */
+static inline gds_driver_type gds_get_driver_type(struct ibv_device *ibdev)
+{
+        const char *dev_name = ibv_get_device_name(ibdev);
+
+        // Heuristically guess the driver by the device name.
+        // Until we find a better way to do so...
+        if (strstr(dev_name, "mlx5") != NULL)
+                return GDS_DRIVER_TYPE_MLX5_EXP;
+        return GDS_DRIVER_TYPE_UNSUPPORTED;
+}
+
+//-----------------------------------------------------------------------------
+
+int gds_destroy_cq(struct gds_cq *gcq);
+
+//-----------------------------------------------------------------------------
+
 /*
  * Local variables:
  *  c-indent-level: 8
diff --git a/tests/gds_kernel_latency.c b/tests/gds_kernel_latency.c
index 63875bf..04370f5 100644
--- a/tests/gds_kernel_latency.c
+++ b/tests/gds_kernel_latency.c
@@ -495,7 +495,7 @@ static int pp_wait_cq(struct pingpong_context *ctx, int is_client)
 {
         int ret;
         if (ctx->peersync) {
-                ret = gds_stream_wait_cq(gpu_stream, &ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
+                ret = gds_stream_wait_cq(gpu_stream, ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
         } else {
                 if (is_client) {
                         do {
@@ -542,23 +542,22 @@ static int pp_post_gpu_send(struct pingpong_context *ctx, uint32_t qpn, CUstream
 		.wr_id	    = PINGPONG_SEND_WRID,
 		.sg_list    = &list,
 		.num_sge    = 1,
-		.exp_opcode = IBV_EXP_WR_SEND,
-		.exp_send_flags = IBV_EXP_SEND_SIGNALED,
+		.opcode     = IBV_WR_SEND,
+		.send_flags = IBV_SEND_SIGNALED,
 		.wr         = {
 			.ud = {
 				 .ah          = ctx->ah,
 				 .remote_qpn  = qpn,
 				 .remote_qkey = 0x11111111
 			 }
-		},
-		.comp_mask = 0
+		}
 	};
 #if 0
 	if (IBV_QPT_UD != gds_qpt) {
 		memset(&ewr, 0, sizeof(ewr));
 		ewr.num_sge = 1;
-		ewr.exp_send_flags = IBV_EXP_SEND_SIGNALED;
-		ewr.exp_opcode = IBV_EXP_WR_SEND;
+		ewr.send_flags = IBV_SEND_SIGNALED;
+		ewr.opcode = IBV_WR_SEND;
 		ewr.wr_id = PINGPONG_SEND_WRID;
 		ewr.sg_list = &list;
 		ewr.next = NULL;
@@ -580,23 +579,22 @@ static int pp_prepare_gpu_send(struct pingpong_context *ctx, uint32_t qpn, gds_s
 		.wr_id	    = PINGPONG_SEND_WRID,
 		.sg_list    = &list,
 		.num_sge    = 1,
-		.exp_opcode = IBV_EXP_WR_SEND,
-		.exp_send_flags = IBV_EXP_SEND_SIGNALED,
+		.opcode     = IBV_WR_SEND,
+		.send_flags = IBV_SEND_SIGNALED,
 		.wr         = {
 			.ud = {
 				 .ah          = ctx->ah,
 				 .remote_qpn  = qpn,
 				 .remote_qkey = 0x11111111
 			 }
-		},
-		.comp_mask = 0
+		}
 	};
 	
 	if (IBV_QPT_UD != gds_qpt) {
 		memset(&ewr, 0, sizeof(ewr));
 		ewr.num_sge = 1;
-		ewr.exp_send_flags = IBV_EXP_SEND_SIGNALED;
-		ewr.exp_opcode = IBV_EXP_WR_SEND;
+		ewr.send_flags = IBV_SEND_SIGNALED;
+		ewr.opcode = IBV_WR_SEND;
 		ewr.wr_id = PINGPONG_SEND_WRID;
 		ewr.sg_list = &list;
 		ewr.next = NULL;
@@ -676,7 +674,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                                 wdesc->descs[k].tag = GDS_TAG_SEND;
                                 wdesc->descs[k].send = &wdesc->send_rq;
                                 ++k;
-                                ret = gds_prepare_wait_cq(&ctx->gds_qp->send_cq, &wdesc->wait_tx_rq, 0);
+                                ret = gds_prepare_wait_cq(ctx->gds_qp->send_cq, &wdesc->wait_tx_rq, 0);
                                 if (ret) {
                                         retcode = -ret;
                                         break;
@@ -685,7 +683,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                                 wdesc->descs[k].tag = GDS_TAG_WAIT;
                                 wdesc->descs[k].wait = &wdesc->wait_tx_rq;
                                 ++k;
-                                ret = gds_prepare_wait_cq(&ctx->gds_qp->recv_cq, &wdesc->wait_rx_rq, 0);
+                                ret = gds_prepare_wait_cq(ctx->gds_qp->recv_cq, &wdesc->wait_rx_rq, 0);
                                 if (ret) {
                                         retcode = -ret;
                                         break;
@@ -715,14 +713,14 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                                         retcode = -ret;
                                         break;
                                 }
-                                ret = gds_stream_wait_cq(gpu_stream, &ctx->gds_qp->send_cq, 0);
+                                ret = gds_stream_wait_cq(gpu_stream, ctx->gds_qp->send_cq, 0);
                                 if (ret) {
                                         // TODO: rollback gpu send
                                         gpu_err("error %d in gds_stream_wait_cq\n", ret);
                                         retcode = -ret;
                                         break;
                                 }
-                                ret = gds_stream_wait_cq(gpu_stream, &ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
+                                ret = gds_stream_wait_cq(gpu_stream, ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
                                 if (ret) {
                                         // TODO: rollback gpu send and wait send_cq
                                         gpu_err("[%d] error %d in gds_stream_wait_cq\n", my_rank, ret);
@@ -751,7 +749,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                         if (ctx->use_desc_apis) {
                                 work_desc_t *wdesc = calloc(1, sizeof(*wdesc));
                                 int k = 0;
-                                ret = gds_prepare_wait_cq(&ctx->gds_qp->recv_cq, &wdesc->wait_rx_rq, 0);
+                                ret = gds_prepare_wait_cq(ctx->gds_qp->recv_cq, &wdesc->wait_rx_rq, 0);
                                 if (ret) {
                                         retcode = -ret;
                                         break;
@@ -773,7 +771,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                                         CUCHECK(cuStreamAddCallback(gpu_stream, post_work_cb, wdesc, 0));
                                 }
                         } else if (ctx->peersync) {
-                                ret = gds_stream_wait_cq(gpu_stream, &ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
+                                ret = gds_stream_wait_cq(gpu_stream, ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
                                 if (ret) {
                                         // TODO: rollback gpu send and wait send_cq
                                         gpu_err("error %d in gds_stream_wait_cq\n", ret);
@@ -806,7 +804,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                                 wdesc->descs[k].tag = GDS_TAG_SEND;
                                 wdesc->descs[k].send = &wdesc->send_rq;
                                 ++k;
-                                ret = gds_prepare_wait_cq(&ctx->gds_qp->send_cq, &wdesc->wait_tx_rq, 0);
+                                ret = gds_prepare_wait_cq(ctx->gds_qp->send_cq, &wdesc->wait_tx_rq, 0);
                                 if (ret) {
                                         retcode = -ret;
                                         break;
@@ -835,7 +833,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                                         retcode = -ret;
                                         break;
                                 }
-                                ret = gds_stream_wait_cq(gpu_stream, &ctx->gds_qp->send_cq, 0);
+                                ret = gds_stream_wait_cq(gpu_stream, ctx->gds_qp->send_cq, 0);
                                 if (ret) {
                                         // TODO: rollback gpu send
                                         gpu_err("error %d in gds_stream_wait_cq\n", ret);
diff --git a/tests/gds_kernel_loopback_latency.c b/tests/gds_kernel_loopback_latency.c
index b2d209c..f6ccc32 100644
--- a/tests/gds_kernel_loopback_latency.c
+++ b/tests/gds_kernel_loopback_latency.c
@@ -511,16 +511,15 @@ static int pp_post_send(struct pingpong_context *ctx, uint32_t qpn)
 		.wr_id	    = PINGPONG_SEND_WRID,
 		.sg_list    = &list,
 		.num_sge    = 1,
-		.exp_opcode = IBV_EXP_WR_SEND,
-		.exp_send_flags = IBV_EXP_SEND_SIGNALED,
+		.opcode     = IBV_WR_SEND,
+		.send_flags = IBV_SEND_SIGNALED,
 		.wr         = {
 			.ud = {
 				 .ah          = ctx->ah,
 				 .remote_qpn  = qpn,
 				 .remote_qkey = 0x11111111
 			 }
-		},
-		.comp_mask = 0
+		}
 	};
 	gds_send_wr *bad_ewr;
 	return gds_post_send(ctx->gds_qp, &ewr, &bad_ewr);
@@ -538,16 +537,15 @@ static int pp_post_gpu_send(struct pingpong_context *ctx, uint32_t qpn, CUstream
 		.wr_id	    = PINGPONG_SEND_WRID,
 		.sg_list    = &list,
 		.num_sge    = 1,
-		.exp_opcode = IBV_EXP_WR_SEND,
-		.exp_send_flags = IBV_EXP_SEND_SIGNALED,
+		.opcode     = IBV_WR_SEND,
+		.send_flags = IBV_SEND_SIGNALED,
 		.wr         = {
 			.ud = {
 				 .ah          = ctx->ah,
 				 .remote_qpn  = qpn,
 				 .remote_qkey = 0x11111111
 			 }
-		},
-		.comp_mask = 0
+		}
 	};
 	gds_send_wr *bad_ewr;
 	return gds_stream_queue_send(*p_gpu_stream, ctx->gds_qp, &ewr, &bad_ewr);
@@ -565,16 +563,15 @@ static int pp_prepare_gpu_send(struct pingpong_context *ctx, uint32_t qpn, gds_s
 		.wr_id	    = PINGPONG_SEND_WRID,
 		.sg_list    = &list,
 		.num_sge    = 1,
-		.exp_opcode = IBV_EXP_WR_SEND,
-		.exp_send_flags = IBV_EXP_SEND_SIGNALED,
+		.opcode     = IBV_WR_SEND,
+		.send_flags = IBV_SEND_SIGNALED,
 		.wr         = {
 			.ud = {
 				 .ah          = ctx->ah,
 				 .remote_qpn  = qpn,
 				 .remote_qkey = 0x11111111
 			 }
-		},
-		.comp_mask = 0
+		}
 	};
 	gds_send_wr *bad_ewr;
         //printf("gpu_post_send_on_stream\n");
@@ -655,7 +652,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                         wdesc->descs[k].send = &wdesc->send_rq;
                         ++k;
 
-                        ret = gds_prepare_wait_cq(&ctx->gds_qp->send_cq, &wdesc->wait_tx_rq, 0);
+                        ret = gds_prepare_wait_cq(ctx->gds_qp->send_cq, &wdesc->wait_tx_rq, 0);
                         if (ret) {
                                 retcode = -ret;
                                 break;
@@ -665,7 +662,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                         wdesc->descs[k].wait = &wdesc->wait_tx_rq;
                         ++k;
 
-                        ret = gds_prepare_wait_cq(&ctx->gds_qp->recv_cq, &wdesc->wait_rx_rq, 0);
+                        ret = gds_prepare_wait_cq(ctx->gds_qp->recv_cq, &wdesc->wait_rx_rq, 0);
                         if (ret) {
                                 retcode = -ret;
                                 break;
@@ -697,7 +694,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                                 break;
                         }
 
-                        ret = gds_stream_wait_cq(gpu_stream_server, &ctx->gds_qp->send_cq, 0);
+                        ret = gds_stream_wait_cq(gpu_stream_server, ctx->gds_qp->send_cq, 0);
                         if (ret) {
                                 // TODO: rollback gpu send
                                 gpu_err("error %d in gds_stream_wait_cq\n", ret);
@@ -705,7 +702,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
                                 break;
                         }
 
-                        ret = gds_stream_wait_cq(gpu_stream_server, &ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
+                        ret = gds_stream_wait_cq(gpu_stream_server, ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
                         if (ret) {
                                 // TODO: rollback gpu send and wait send_cq
                                 gpu_err("error %d in gds_stream_wait_cq\n", ret);