From ce78eb62a3684355829e159943235fc2c045b3ca Mon Sep 17 00:00:00 2001 From: Pak Markthub Date: Wed, 4 Aug 2021 01:20:19 -0400 Subject: [PATCH 1/3] Added --with-spectrum-mpi to configure --- Makefile.am | 6 +++--- configure.ac | 44 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/Makefile.am b/Makefile.am index a66dfda..ac9a0cc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -36,7 +36,7 @@ bin_PROGRAMS = tests/gds_kernel_latency tests/gds_poll_lat tests/gds_kernel_loop noinst_PROGRAMS = tests/rstest tests/wqtest tests_gds_kernel_latency_SOURCES = tests/gds_kernel_latency.c tests/gpu_kernels.cu tests/pingpong.c tests/gpu.cpp -tests_gds_kernel_latency_LDADD = $(top_builddir)/src/libgdsync.la -lmpi $(LIBGDSTOOLS) -lgdrapi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS) +tests_gds_kernel_latency_LDADD = $(top_builddir)/src/libgdsync.la $(MPILDFLAGS) $(LIBGDSTOOLS) -lgdrapi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS) tests_rstest_SOURCES = tests/rstest.cpp tests_rstest_LDADD = @@ -45,10 +45,10 @@ tests_wqtest_SOURCES = tests/task_queue_test.cpp tests_wqtest_LDADD = $(PTHREAD_LIBS) tests_gds_poll_lat_SOURCES = tests/gds_poll_lat.c tests/gpu.cpp tests/gpu_kernels.cu -tests_gds_poll_lat_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi -lmpi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS) +tests_gds_poll_lat_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi $(MPILDFLAGS) $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS) tests_gds_sanity_SOURCES = tests/gds_sanity.cpp tests/gpu.cpp tests/gpu_kernels.cu -tests_gds_sanity_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi -lmpi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS) +tests_gds_sanity_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi $(MPILDFLAGS) $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS) tests_gds_kernel_loopback_latency_SOURCES = tests/gds_kernel_loopback_latency.c tests/pingpong.c tests/gpu.cpp tests/gpu_kernels.cu tests_gds_kernel_loopback_latency_LDADD = $(top_builddir)/src/libgdsync.la $(LIBGDSTOOLS) -lgdrapi $(LIBNVTX) -lcuda -lcudart $(PTHREAD_LIBS) diff --git a/configure.ac b/configure.ac index a79aed6..e20f313 100644 --- a/configure.ac +++ b/configure.ac @@ -93,25 +93,54 @@ else AC_SUBST(LIBGDSTOOLS) fi -AC_ARG_WITH([mpi], - AC_HELP_STRING([--with-mpi], [ Set path to mpi installation ])) -if test x$with_mpi = x || test x$with_mpi = xno; then +AC_ARG_WITH([spectrum-mpi], + AC_HELP_STRING([--with-spectrum-mpi], [ Set path to Spectrum MPI installation ])) +if test x$with_spectrum_mpi = x || test x$with_spectrum_mpi = xno; then # assuming system location mpi_home=/usr - MPICC=$with_home/bin/mpicc - MPICXX=$with_home/bin/mpic++ + MPICC=/bin/mpicc + MPICXX=/bin/mpic++ + MPILDFLAGS="-lmpi_ibm" else - if test -d $with_mpi; then - mpi_home=$with_mpi + if test -d $with_spectrum_mpi; then + mpi_home=$with_spectrum_mpi MPICC=${mpi_home}/bin/mpicc MPICXX=${mpi_home}/bin/mpic++ CPPFLAGS="$CPPFLAGS -I${mpi_home}/include" LDFLAGS="$LDFLAGS -L${mpi_home}/lib -L${mpi_home}/lib64" + MPILDFLAGS="-lmpi_ibm" else echo "MPI dir does not exist" fi fi +AC_ARG_WITH([mpi], + AC_HELP_STRING([--with-mpi], [ Set path to MPI installation ])) +if test x$with_spectrum_mpi = x || test x$with_spectrum_mpi == xno; then + if test x$with_mpi = x || test x$with_mpi = xno; then + # assuming system location + mpi_home=/usr + MPICC=/bin/mpicc + MPICXX=/bin/mpic++ + MPILDFLAGS="-lmpi" + else + if test -d $with_mpi; then + mpi_home=$with_mpi + MPICC=${mpi_home}/bin/mpicc + MPICXX=${mpi_home}/bin/mpic++ + CPPFLAGS="$CPPFLAGS -I${mpi_home}/include" + LDFLAGS="$LDFLAGS -L${mpi_home}/lib -L${mpi_home}/lib64" + MPILDFLAGS="-lmpi" + else + echo "MPI dir does not exist" + fi + fi +fi + +if test x$with_spectrum_mpi != x && test x$with_spectrum_mpi != xno && test x$with_mpi != x && test x$with_mpi != xno; then + AC_MSG_ERROR([--with-mpi and --with-spectrum-mpi are mutually exclusive.]) +fi + dnl Specify CUDA Location AC_ARG_WITH(cuda-toolkit, AC_HELP_STRING([--with-cuda-toolkit=CUDATKDIR], [ Specify CUDA toolkit installation directory (default: /usr/local/cuda)]), @@ -186,6 +215,7 @@ AC_MSG_NOTICE([Setting MPI_PATH = ${mpi_home} ]) AC_SUBST( MPI_PATH, [${mpi_home} ]) AC_SUBST( MPICC, [${MPICC} ]) AC_SUBST( MPICXX, [${MPICXX} ]) +AC_SUBST( MPILDFLAGS, [${MPILDFLAGS} ]) CPPFLAGS="$CPPFLAGS -I$CUDA_DRV_PATH/include -I$CUDA_PATH/include" LDFLAGS="$LDFLAGS -L$CUDA_DRV_PATH/lib64 -L$CUDA_DRV_PATH/lib -L$CUDA_PATH/lib64 -L$CUDA_PATH/lib" From 66409ac64785c884889e3d85df10a3a34bc2e8f9 Mon Sep 17 00:00:00 2001 From: Pak Markthub Date: Wed, 4 Aug 2021 21:42:55 -0400 Subject: [PATCH 2/3] Introduced GDS_DRIVER_TYPE to gds_qp and gds_cq --- include/gdsync/core.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/include/gdsync/core.h b/include/gdsync/core.h index 7ff0cbb..87a4cc3 100644 --- a/include/gdsync/core.h +++ b/include/gdsync/core.h @@ -40,26 +40,33 @@ ((((v) & 0x0000ffffU) >> 0 ) >= (unsigned)GDS_API_MINOR_VERSION) ) typedef enum gds_param { - GDS_PARAM_VERSION, - GDS_NUM_PARAMS + GDS_PARAM_VERSION, + GDS_NUM_PARAMS } gds_param_t; int gds_query_param(gds_param_t param, int *value); enum gds_create_qp_flags { - GDS_CREATE_QP_DEFAULT = 0, - GDS_CREATE_QP_WQ_ON_GPU = 1<<0, - GDS_CREATE_QP_TX_CQ_ON_GPU = 1<<1, - GDS_CREATE_QP_RX_CQ_ON_GPU = 1<<2, - GDS_CREATE_QP_WQ_DBREC_ON_GPU = 1<<5, + GDS_CREATE_QP_DEFAULT = 0, + GDS_CREATE_QP_WQ_ON_GPU = 1<<0, + GDS_CREATE_QP_TX_CQ_ON_GPU = 1<<1, + GDS_CREATE_QP_RX_CQ_ON_GPU = 1<<2, + GDS_CREATE_QP_WQ_DBREC_ON_GPU = 1<<5, }; typedef struct ibv_exp_qp_init_attr gds_qp_init_attr_t; typedef struct ibv_exp_send_wr gds_send_wr; +typedef enum gds_driver_type { + GDS_DRIVER_TYPE_MLX5_EXP = 0, + GDS_DRIVER_TYPE_MLX5_DV, + GDS_DRIVER_TYPE_MLX5_DEVX +} gds_driver_type_t; + struct gds_cq { struct ibv_cq *cq; uint32_t curr_offset; + gds_driver_type_t dtype; }; struct gds_qp { @@ -68,6 +75,7 @@ struct gds_qp { struct gds_cq recv_cq; struct ibv_exp_res_domain * res_domain; struct ibv_context *dev_context; + gds_driver_type_t dtype; }; /* \brief: Create a peer-enabled QP attached to the specified GPU id. From fdd2270a1a2736657c6385e75d0991b6146a6829 Mon Sep 17 00:00:00 2001 From: Pak Markthub Date: Wed, 4 Aug 2021 21:43:15 -0400 Subject: [PATCH 3/3] Set gds_qp and gds_cq dtype to MLX5_EXP in the creation functions --- src/gdsync.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gdsync.cpp b/src/gdsync.cpp index 90d5508..e863ec0 100644 --- a/src/gdsync.cpp +++ b/src/gdsync.cpp @@ -1754,6 +1754,8 @@ gds_create_cq_internal(struct ibv_context *context, int cqe, return NULL; } + gcq->dtype = GDS_DRIVER_TYPE_MLX5_EXP; + return gcq; } @@ -1893,6 +1895,7 @@ struct gds_qp *gds_create_qp(struct ibv_pd *pd, struct ibv_context *context, gqp->send_cq.curr_offset = 0; gqp->recv_cq.cq = qp->recv_cq; gqp->recv_cq.curr_offset = 0; + gqp->dtype = GDS_DRIVER_TYPE_MLX5_EXP; gds_dbg("created gds_qp=%p\n", gqp);