From a6f597b5ad002254348d91991f6e41a84cf099e2 Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <63189531+kittywhiskers@users.noreply.github.com> Date: Sun, 15 Aug 2021 22:00:46 +0530 Subject: [PATCH 1/4] core: remove argon2d gpu miner logic --- build-aux/m4/ax_check_cuda.m4 | 153 ------------------------ configure.ac | 28 ----- src/Makefile.am | 24 +--- src/Makefile.dht.test.include | 4 - src/Makefile.gpu.include | 120 ------------------- src/Makefile.qt.include | 8 -- src/Makefile.qttest.include | 4 - src/Makefile.test.include | 4 - src/init.cpp | 12 +- src/{miner/miner-util.cpp => miner.cpp} | 2 +- src/{miner/miner-util.h => miner.h} | 0 src/qt/dynamicgui.cpp | 24 ---- src/qt/dynamicgui.h | 3 - src/qt/guiutil.cpp | 15 --- src/qt/guiutil.h | 2 +- src/qt/hashrategraphwidget.cpp | 16 +-- src/qt/hashrategraphwidget.h | 1 - src/qt/walletframe.cpp | 7 -- src/qt/walletframe.h | 2 - src/qt/walletview.cpp | 13 -- src/qt/walletview.h | 4 - src/rpc/mining.cpp | 89 +------------- src/test/miner_tests.cpp | 41 +------ src/test/test_dynamic.cpp | 2 +- src/test/txvalidationcache_tests.cpp | 2 +- 25 files changed, 10 insertions(+), 570 deletions(-) delete mode 100644 build-aux/m4/ax_check_cuda.m4 delete mode 100644 src/Makefile.gpu.include rename src/{miner/miner-util.cpp => miner.cpp} (99%) rename src/{miner/miner-util.h => miner.h} (100%) diff --git a/build-aux/m4/ax_check_cuda.m4 b/build-aux/m4/ax_check_cuda.m4 deleted file mode 100644 index ce123afae0..0000000000 --- a/build-aux/m4/ax_check_cuda.m4 +++ /dev/null @@ -1,153 +0,0 @@ -##### -# -# SYNOPSIS -# -# AX_CHECK_CUDA -# -# DESCRIPTION -# -# Figures out if CUDA Driver API/nvcc is available, i.e. existence of: -# nvcc -# cuda.h -# libcuda.a -# -# If something isn't found, fails straight away. -# -# The following variables are substituted in the makefile: -# NVCC : the nvcc compiler command. -# NVCCFLAGS : nvcc specific flags -# CUDA_CFLAGS : CUDA includes -# CUDA_LDLIBS : CUDA libraries -# -# Defines HAVE_CUDA in config.h -# -# LICENCE -# Public domain -# -##### - -AC_DEFUN([AX_CHECK_CUDA], [ - -# Provide your CUDA path with this -AC_ARG_WITH([cuda], - [AS_HELP_STRING([--with-cuda=PATH],[prefix where CUDA is installed @<:@default=no@:>@])], - [], - [with_cuda=yes]) - -NVCC=no -CUDA_CFLAGS= -CUDA_LDLIBS= - -if test "x$with_cuda" != "xno" -then - - # ----------------------------------------- - # Setup CUDA paths - # ----------------------------------------- - if test "x$with_cuda" != "xyes" - then - AX_NORMALIZE_PATH([with_cuda], ["/"]) - CUDAPATH="$with_cuda" - CUDA_CFLAGS+=" -I$with_cuda/include" - CUDA_LDLIBS+=" -L$with_cuda/lib64" - else - AC_CHECK_FILE(/usr/local/cuda/,[CUDAPATH="/usr/local/cuda"],[]) - AC_CHECK_FILE(/usr/local/cuda/include,[CUDA_CFLAGS+=" -I/usr/local/cuda/include"],[CUDA_CFLAGS=""]) - AC_CHECK_FILE(/usr/local/cuda/lib64,[CUDA_LDLIBS+=" -L/usr/local/cuda/lib64"],[]) - fi - CUDA_LDLIBS+=" -lcuda -lcudart -lcublas" - - - # ----------------------------------------- - # Checking for nvcc - # ----------------------------------------- - AC_PATH_PROG([NVCC],[nvcc],[no],[$PATH:$CUDAPATH/bin]) - if test "x$NVCC" = "xno" - then - AC_MSG_ERROR([Cannot find nvcc compiler. To enable CUDA, please add path to - nvcc in the PATH environment variable and/or specify the path - where CUDA is installed using: --with-cuda=PATH]) - fi - - - # ----------------------------------------- - # Setup nvcc flags - # ----------------------------------------- - AC_ARG_VAR(NVCCFLAGS,[Additional nvcc flags (example: NVCCFLAGS="-arch=compute_30 -code=sm_30")]) - if test x$DEBUG = xtrue - then - NVCCFLAGS+=" -g" - else - NVCCFLAGS+=" -O3" - fi - AC_ARG_ENABLE([emu], - AS_HELP_STRING([--enable-emu],[turn on device emulation for CUDA]), - [case "${enableval}" in - yes) EMULATION=true;; - no) EMULATION=false;; - *) AC_MSG_ERROR([bad value ${enableval} for --enable-emu]);; - esac], - [EMULATION=false]) - if test x$EMULATION = xtrue - then - NVCCFLAGS+=" -deviceemu" - fi - - - # ----------------------------------------- - # Check if nvcc works - # ----------------------------------------- - ac_compile_nvcc=no - AC_MSG_CHECKING([whether nvcc works]) - cat>conftest.cu< /dev/null - then - ac_compile_nvcc=yes - fi - rm -f conftest.cu conftest.o - AC_MSG_RESULT([$ac_compile_nvcc]) - - if test "x$ac_compile_nvcc" = "xno" - then - AC_MSG_ERROR([CUDA compiler has problems.]) - fi - - - # ----------------------------------------- - # Check for headers and libraries - # ----------------------------------------- - ax_save_CFLAGS="${CFLAGS}" - ax_save_LIBS="${LIBS}" - - CFLAGS="$CUDA_CFLAGS $CFLAGS" - LIBS="$CUDA_LDLIBS $LIBS" - - # And the header and the lib - AC_CHECK_HEADER([cuda.h], [], AC_MSG_FAILURE([Couldn't find cuda.h]), [#include ]) - AC_CHECK_HEADER([cuda_runtime_api.h], [], AC_MSG_FAILURE([Couldn't find cuda_runtime_api.h]), [#include ]) - AC_CHECK_HEADER([cublas.h], [], AC_MSG_FAILURE([Couldn't find cublas.h]), [#include ]) - AC_CHECK_LIB([cuda], [cuInit], [], AC_MSG_FAILURE([Couldn't find libcuda])) - AC_CHECK_LIB([cudart], [cudaMalloc], [], AC_MSG_FAILURE([Couldn't find libcudart])) - AC_CHECK_LIB([cublas], [cublasInit], [], AC_MSG_FAILURE([Couldn't find libcublas])) - - # Returning to the original flags - CFLAGS=${ax_save_CFLAGS} - LIBS=${ax_save_LIBS} - - AC_DEFINE(HAVE_CUDA,1,[Define if we have CUDA]) -fi - - -# Announcing the new variables -AC_SUBST([NVCC]) -AC_SUBST([NVCCFLAGS]) -AC_SUBST([CUDA_CFLAGS]) -AC_SUBST([CUDA_LDLIBS]) -]) diff --git a/configure.ac b/configure.ac index 890f69d2d4..3980fc0195 100644 --- a/configure.ac +++ b/configure.ac @@ -91,20 +91,6 @@ AC_ARG_ENABLE([wallet], [enable_wallet=$enableval], [enable_wallet=yes]) -# Enable GPU miner -AC_ARG_ENABLE([gpu], - [AS_HELP_STRING([--enable-gpu], - [enable GPU miner (disabled by default)])], - [enable_gpu=$enableval], - [enable_gpu=no]) - -# Enable CUDA -AC_ARG_ENABLE([cuda], - [AS_HELP_STRING([--enable-cuda], - [enable CUDA miner (defaults is no)])], - [enable_cuda=$enableval], - [enable_cuda=no]) - AC_ARG_WITH([miniupnpc], [AS_HELP_STRING([--with-miniupnpc], [enable UPNP (default is yes if libminiupnpc is found)])], @@ -963,18 +949,6 @@ else AC_MSG_RESULT(no) fi -if test x$enable_gpu = xyes; then - AC_DEFINE_UNQUOTED([ENABLE_GPU],[1],[Define to 1 to enable GPU miner]) - - if test "x$enable_cuda" = xyes; then - AX_CHECK_CUDA - if test "x$NVCC" = xno; then - enable_cuda=no - AC_MSG_WARN([NVIDIA CUDA nvcc compiler not found, falling back on OpenCL. Specify --with-cuda=/path/to/cuda or --disable-gpu]) - fi - fi -fi - dnl enable upnp support AC_MSG_CHECKING([whether to build with support for UPnP]) if test x$have_miniupnpc = xno; then @@ -1067,8 +1041,6 @@ fi AM_CONDITIONAL([TARGET_DARWIN], [test x$TARGET_OS = xdarwin]) AM_CONDITIONAL([BUILD_DARWIN], [test x$BUILD_OS = xdarwin]) AM_CONDITIONAL([TARGET_WINDOWS], [test x$TARGET_OS = xwindows]) -AM_CONDITIONAL([ENABLE_GPU],[test x$enable_gpu = xyes]) -AM_CONDITIONAL([ENABLE_CUDA], [test x"$enable_cuda" = xyes]) AM_CONDITIONAL([ENABLE_WALLET],[test x$enable_wallet = xyes]) AM_CONDITIONAL([ENABLE_TESTS],[test x$BUILD_TEST = xyes]) AM_CONDITIONAL([ENABLE_QT],[test x$dynamic_enable_qt = xyes]) diff --git a/src/Makefile.am b/src/Makefile.am index 1e99260b2e..e02301911a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -160,16 +160,7 @@ DYNAMIC_CORE_H = \ memusage.h \ merkleblock.h \ messagesigner.h \ - miner/impl/miner-cpu.h \ - miner/impl/miner-gpu.h \ - miner/internal/hash-rate-counter.h \ - miner/internal/miner-base.h \ - miner/internal/miner-context.h \ - miner/internal/miners-controller.h \ - miner/internal/miners-group.h \ - miner/internal/thread-group.h \ - miner/miner-util.h \ - miner/miner.h \ + miner.h \ net.h \ net_processing.h \ netaddress.h \ @@ -315,14 +306,7 @@ libdynamic_server_a_SOURCES = \ instantsend.cpp \ merkleblock.cpp \ messagesigner.cpp \ - miner/impl/miner-cpu.cpp \ - miner/impl/miner-gpu.cpp \ - miner/internal/hash-rate-counter.cpp \ - miner/internal/miner-base.cpp \ - miner/internal/miner-context.cpp \ - miner/internal/miners-controller.cpp \ - miner/miner-util.cpp \ - miner/miner.cpp \ + miner.cpp \ net.cpp \ netfulfilledman.cpp \ net_processing.cpp \ @@ -685,10 +669,6 @@ endif include Makefile.vgp.include -if ENABLE_GPU -include Makefile.gpu.include -endif - if ENABLE_TESTS include Makefile.test.include endif diff --git a/src/Makefile.dht.test.include b/src/Makefile.dht.test.include index 05f37c2a15..ea231954d2 100644 --- a/src/Makefile.dht.test.include +++ b/src/Makefile.dht.test.include @@ -21,10 +21,6 @@ dht_test_data_test_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(LIBTOOL_APP_LDFLAGS) - if ENABLE_ZMQ dht_test_data_test_LDADD += $(LIBDYNAMIC_ZMQ) $(ZMQ_LIBS) endif -if ENABLE_GPU -dht_test_data_test_LDADD += $(LIBDYNAMIC_GPU_LDADD_EXTRA) -dht_test_data_test_LDFLAGS += $(LIBDYNAMIC_GPU_LDFLAGS_EXTRA) -endif nodist_dht_test_data_test_SOURCES = $(GENERATED_DHT_TEST_FILES) diff --git a/src/Makefile.gpu.include b/src/Makefile.gpu.include deleted file mode 100644 index 42f8db92ec..0000000000 --- a/src/Makefile.gpu.include +++ /dev/null @@ -1,120 +0,0 @@ -# -# Argon2 GPU miner -# - -LIBDYNAMIC_GPU = crypto/argon2gpu/libdynamic_gpu.a -LIBDYNAMIC_GPU_LDADD_EXTRA = $(LIBDYNAMIC_GPU) -LIBDYNAMIC_GPU_LDFLAGS_EXTRA = -EXTRA_LIBRARIES += $(LIBDYNAMIC_GPU) - -crypto_argon2gpu_libdynamic_gpu_a_CPPFLAGS = $(DYNAMIC_INCLUDES) -crypto_argon2gpu_libdynamic_gpu_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS) -crypto_argon2gpu_libdynamic_gpu_a_SOURCES = \ - crypto/argon2gpu/blake2b.h \ - crypto/argon2gpu/common.h \ - crypto/argon2gpu/common.cpp \ - crypto/argon2gpu/blake2b.cpp - -if ENABLE_CUDA -LIBDYNAMIC_GPU_CUDA = crypto/argon2gpu/cuda/libdynamic_gpu_cuda.a -EXTRA_LIBRARIES += $(LIBDYNAMIC_GPU_CUDA) - -LIBDYNAMIC_GPU_LDADD_EXTRA += $(LIBDYNAMIC_GPU_CUDA) -LIBDYNAMIC_GPU_LDADD_EXTRA += $(CUDA_LDLIBS) -DYNAMIC_INCLUDES += $(CUDA_CFLAGS) - -NVCCFLAGS += -arch=sm_30 -w -lineinfo -Xcompiler='--std=c++11 $(CFLAGS)' - -crypto_argon2gpu_cuda_libdynamic_gpu_cuda_a_AR = $(NVCC) $(NVCCFLAGS) -lib -o -crypto_argon2gpu_cuda_libdynamic_gpu_cuda_a_LIBADD = $(CUDA_LDLIBS) -crypto_argon2gpu_cuda_libdynamic_gpu_cuda_a_CPPFLAGS = $(DYNAMIC_INCLUDES) -crypto_argon2gpu_cuda_libdynamic_gpu_cuda_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS) -crypto_argon2gpu_cuda_libdynamic_gpu_cuda_a_LIBADD += \ - crypto/argon2gpu/cuda/blake2b-kernels.cu.o \ - crypto/argon2gpu/cuda/kernels.cu.o -crypto_argon2gpu_cuda_libdynamic_gpu_cuda_a_SOURCES = \ - crypto/argon2gpu/cuda/cuda-exception.h \ - crypto/argon2gpu/cuda/device.h \ - crypto/argon2gpu/cuda/global-context.h \ - crypto/argon2gpu/cuda/blake2b-kernels.h \ - crypto/argon2gpu/cuda/kernels.h \ - crypto/argon2gpu/cuda/processing-unit.h \ - crypto/argon2gpu/cuda/program-context.h \ - crypto/argon2gpu/cuda/device.cpp \ - crypto/argon2gpu/cuda/global-context.cpp \ - crypto/argon2gpu/cuda/blake2b-kernels.cu \ - crypto/argon2gpu/cuda/kernels.cu \ - crypto/argon2gpu/cuda/processing-unit.cpp \ - crypto/argon2gpu/cuda/program-context.cpp - -nvcc_FLAGS = -I. $(CUDA_CFLAGS) $(NVCCFLAGS) - -.cu.o: - $(NVCC) $(nvcc_FLAGS) -dc -o $@ -c $< - - -crypto/argon2gpu/cuda/blake2b-kernels.cu.o: crypto/argon2gpu/cuda/blake2b-kernels.cu - $(NVCC) $(nvcc_FLAGS) -dc -o $@ -c $< - -crypto/argon2gpu/cuda/blake2b-kernels.o: crypto/argon2gpu/cuda/blake2b-kernels.cu.o - $(NVCC) $(nvcc_FLAGS) -dlink -o $@ $< - -crypto/argon2gpu/cuda/kernels.cu.o: crypto/argon2gpu/cuda/kernels.cu - $(NVCC) $(nvcc_FLAGS) -dc -o $@ -c $< - -crypto/argon2gpu/cuda/kernels.o: crypto/argon2gpu/cuda/kernels.cu.o - $(NVCC) $(nvcc_FLAGS) -dlink -o $@ $< - - -else -LIBDYNAMIC_GPU_OPENCL = crypto/argon2gpu/opencl/libdynamic_gpu_opencl.a -EXTRA_LIBRARIES += $(LIBDYNAMIC_GPU_OPENCL) - -LIBDYNAMIC_GPU_LDADD_EXTRA += $(LIBDYNAMIC_GPU_OPENCL) - -if TARGET_DARWIN -LIBDYNAMIC_GPU_LDFLAGS_EXTRA += -stdlib=libc++ -framework OpenCL -L/System/Library/Frameworks/OpenCL.framework/Libraries -Wl,-x -m64 -else -LIBDYNAMIC_GPU_LDADD_EXTRA += -lOpenCL -endif - -OPENCL_KERNEL_FILES = crypto/argon2gpu/opencl/kernel.cl - -GENERATED_KERNEL_FILES = $(OPENCL_KERNEL_FILES:.cl=.cl.h) - -crypto_argon2gpu_opencl_libdynamic_gpu_opencl_a_CPPFLAGS = $(AM_CPPFLAGS) $(DYNAMIC_INCLUDES) -crypto_argon2gpu_opencl_libdynamic_gpu_opencl_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS) -crypto_argon2gpu_opencl_libdynamic_gpu_opencl_a_SOURCES = \ - $(OPENCL_KERNEL_FILES) \ - crypto/argon2gpu/opencl/cl.hpp \ - crypto/argon2gpu/opencl/device.h \ - crypto/argon2gpu/opencl/global-context.h \ - crypto/argon2gpu/opencl/kernel-loader.h \ - crypto/argon2gpu/opencl/kernel-runner.h \ - crypto/argon2gpu/opencl/opencl.h \ - crypto/argon2gpu/opencl/processing-unit.h \ - crypto/argon2gpu/opencl/program-context.h \ - crypto/argon2gpu/opencl/device.cpp \ - crypto/argon2gpu/opencl/global-context.cpp \ - crypto/argon2gpu/opencl/kernel-loader.cpp \ - crypto/argon2gpu/opencl/kernel-runner.cpp \ - crypto/argon2gpu/opencl/processing-unit.cpp \ - crypto/argon2gpu/opencl/program-context.cpp - -nodist_crypto_argon2gpu_opencl_libdynamic_gpu_opencl_a_SOURCES = $(GENERATED_KERNEL_FILES) - -BUILT_SOURCES = $(GENERATED_KERNEL_FILES) -CLEANFILES += $(GENERATED_KERNEL_FILES) - -%.cl.h: %.cl - @$(MKDIR_P) $(@D) - @echo "namespace argon2gpu { namespace opencl { namespace code {" > $@ - @echo "static unsigned const char $(*F)[] = {" >> $@ - @$(HEXDUMP) -v -e '16/1 "_x%02X" "\n"' $< | $(SED) -e 's/_/\\/g; s/\\x //g; s/.*/ "&"/' >> $@ - @echo "};" >> $@ - @echo "}}}" >> $@ - @echo "Generated $@" -endif - -dynamicd_LDADD += $(LIBDYNAMIC_GPU_LDADD_EXTRA) -dynamicd_LDFLAGS += $(LIBDYNAMIC_GPU_LDFLAGS_EXTRA) diff --git a/src/Makefile.qt.include b/src/Makefile.qt.include index 26552211d2..6c57f41c7c 100644 --- a/src/Makefile.qt.include +++ b/src/Makefile.qt.include @@ -36,7 +36,6 @@ QT_FORMS_UI = \ qt/forms/editaddressdialog.ui \ qt/forms/helpmessagedialog.ui \ qt/forms/intro.ui \ - qt/forms/miningpage.ui \ qt/forms/mnemonicdialog.ui \ qt/forms/modaloverlay.ui \ qt/forms/openuridialog.ui \ @@ -80,7 +79,6 @@ QT_MOC_CPP = \ qt/moc_hashrategraphwidget.cpp \ qt/moc_macdockiconhandler.cpp \ qt/moc_macnotificationhandler.cpp \ - qt/moc_miningpage.cpp \ qt/moc_mnemonicdialog.cpp \ qt/moc_modaloverlay.cpp \ qt/moc_notificator.cpp \ @@ -157,7 +155,6 @@ DYNAMIC_QT_H = \ qt/intro.h \ qt/macdockiconhandler.h \ qt/macnotificationhandler.h \ - qt/miningpage.h \ qt/mnemonicdialog.h \ qt/modaloverlay.h \ qt/networkstyle.h \ @@ -315,7 +312,6 @@ DYNAMIC_QT_CPP += \ qt/dynodelist.cpp \ qt/editaddressdialog.cpp \ qt/hashrategraphwidget.cpp \ - qt/miningpage.cpp \ qt/mnemonicdialog.cpp \ qt/openuridialog.cpp \ qt/overviewpage.cpp \ @@ -399,10 +395,6 @@ if ENABLE_WALLET qt_dynamic_qt_LDADD += $(LIBDYNAMIC_WALLET) endif qt_dynamic_qt_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(QT_LDFLAGS) $(LIBTOOL_APP_LDFLAGS) -if ENABLE_GPU -qt_dynamic_qt_LDADD += $(LIBDYNAMIC_GPU_LDADD_EXTRA) -qt_dynamic_qt_LDFLAGS += $(LIBDYNAMIC_GPU_LDFLAGS_EXTRA) -endif if ENABLE_ZMQ qt_dynamic_qt_LDADD += $(LIBDYNAMIC_ZMQ) $(ZMQ_LIBS) endif diff --git a/src/Makefile.qttest.include b/src/Makefile.qttest.include index 88a4787682..f6d013af53 100644 --- a/src/Makefile.qttest.include +++ b/src/Makefile.qttest.include @@ -33,10 +33,6 @@ if ENABLE_ZMQ qt_test_test_dynamic_qt_LDADD += $(LIBDYNAMIC_ZMQ) $(ZMQ_LIBS) endif qt_test_test_dynamic_qt_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(QT_LDFLAGS) $(LIBTOOL_APP_LDFLAGS) -if ENABLE_GPU -qt_test_test_dynamic_qt_LDADD += $(LIBDYNAMIC_GPU_LDADD_EXTRA) -qt_test_test_dynamic_qt_LDFLAGS += $(LIBDYNAMIC_GPU_LDFLAGS_EXTRA) -endif qt_test_test_dynamic_qt_LDADD += $(LIBDYNAMIC_CLI) $(LIBDYNAMIC_COMMON) $(LIBDYNAMIC_UTIL) $(LIBDYNAMIC_CONSENSUS) $(LIBVGP) $(LIBDYNAMIC_CRYPTO) $(LIBUNIVALUE) $(LIBLEVELDB) \ $(LIBLEVELDB_SSE42) $(LIBMEMENV) $(BOOST_LIBS) $(QT_DBUS_LIBS) $(QT_TEST_LIBS) $(QT_LIBS) \ diff --git a/src/Makefile.test.include b/src/Makefile.test.include index 2499891dec..7284d05a88 100644 --- a/src/Makefile.test.include +++ b/src/Makefile.test.include @@ -129,10 +129,6 @@ test_test_dynamic_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(LIBTOOL_APP_LDFLAGS) -s if ENABLE_ZMQ test_test_dynamic_LDADD += $(ZMQ_LIBS) endif -if ENABLE_GPU -test_test_dynamic_LDADD += $(LIBDYNAMIC_GPU_LDADD_EXTRA) -test_test_dynamic_LDFLAGS += $(LIBDYNAMIC_GPU_LDFLAGS_EXTRA) -endif nodist_test_test_dynamic_SOURCES = $(GENERATED_TEST_FILES) diff --git a/src/init.cpp b/src/init.cpp index f92eb86fb4..add03a1513 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -40,9 +40,8 @@ #include "httpserver.h" #include "instantsend.h" #include "key.h" +#include "miner.h" #include "messagesigner.h" -#include "miner/internal/miners-controller.h" -#include "miner/miner.h" #include "net.h" #include "net_processing.h" #include "netfulfilledman.h" @@ -304,7 +303,6 @@ void PrepareShutdown() if (pwalletMain) pwalletMain->Flush(false); #endif - ShutdownMiners(); MapPort(false); UnregisterValidationInterface(peerLogic.get()); peerLogic.reset(); @@ -2120,14 +2118,6 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler) if (!connman.Start(scheduler, strNodeError, connOptions)) return InitError(strNodeError); - // Generate coins in the background - if (GetBoolArg("-gen", DEFAULT_GENERATE)) { - InitMiners(chainparams, connman); - SetCPUMinerThreads(GetArg("-genproclimit-cpu", DEFAULT_GENERATE_THREADS_CPU)); - SetGPUMinerThreads(GetArg("-genproclimit-gpu", DEFAULT_GENERATE_THREADS_GPU)); - StartMiners(); - } - // Start the DHT Torrent networks in the background //const bool fMultiSessions = GetArg("-multidhtsessions", false); //StartTorrentDHTNetwork(fMultiSessions, chainparams, connman); diff --git a/src/miner/miner-util.cpp b/src/miner.cpp similarity index 99% rename from src/miner/miner-util.cpp rename to src/miner.cpp index 252f8b7185..73ae5bd116 100644 --- a/src/miner/miner-util.cpp +++ b/src/miner.cpp @@ -5,7 +5,7 @@ // Distributed under the MIT/X11 software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. -#include "miner/miner-util.h" +#include "miner.h" #include "consensus/consensus.h" #include "consensus/merkle.h" #include "consensus/validation.h" diff --git a/src/miner/miner-util.h b/src/miner.h similarity index 100% rename from src/miner/miner-util.h rename to src/miner.h diff --git a/src/qt/dynamicgui.cpp b/src/qt/dynamicgui.cpp index f348e06410..91d3672fac 100644 --- a/src/qt/dynamicgui.cpp +++ b/src/qt/dynamicgui.cpp @@ -104,7 +104,6 @@ DynamicGUI::DynamicGUI(const PlatformStyle* _platformStyle, const NetworkStyle* receiveCoinsMenuAction(0), historyAction(0), dynodeAction(0), - miningAction(0), bdapAction(0), quitAction(0), usedSendingAddressesAction(0), @@ -359,17 +358,6 @@ void DynamicGUI::createActions() #endif tabGroup->addAction(dynodeAction); - miningAction = new QAction(QIcon(":/icons/" + theme + "/tx_mined"), tr("&Mining"), this); - miningAction->setStatusTip(tr("Mine Dynamic(DYN)")); - miningAction->setToolTip(miningAction->statusTip()); - miningAction->setCheckable(true); -#ifdef Q_OS_MAC - miningAction->setShortcut(QKeySequence(Qt::CTRL + Qt::Key_6)); -#else - miningAction->setShortcut(QKeySequence(Qt::ALT + Qt::Key_6)); -#endif - tabGroup->addAction(miningAction); - bdapAction = new QAction(QIcon(":/icons/" + theme + "/bdap"), tr("&BDAP"), this); bdapAction->setStatusTip(tr("BDAP")); bdapAction->setToolTip(bdapAction->statusTip()); @@ -397,8 +385,6 @@ void DynamicGUI::createActions() connect(historyAction, SIGNAL(triggered()), this, SLOT(gotoHistoryPage())); connect(dynodeAction, SIGNAL(triggered()), this, SLOT(showNormalIfMinimized())); connect(dynodeAction, SIGNAL(triggered()), this, SLOT(gotoDynodePage())); - connect(miningAction, SIGNAL(triggered()), this, SLOT(showNormalIfMinimized())); - connect(miningAction, SIGNAL(triggered()), this, SLOT(gotoMiningPage())); connect(bdapAction, SIGNAL(triggered()), this, SLOT(showNormalIfMinimized())); connect(bdapAction, SIGNAL(triggered()), this, SLOT(gotoBdapPage())); @@ -595,7 +581,6 @@ void DynamicGUI::createToolBars() toolbar->addAction(receiveCoinsAction); toolbar->addAction(historyAction); toolbar->addAction(dynodeAction); - toolbar->addAction(miningAction); toolbar->addAction(bdapAction); /** Create additional container for toolbar and walletFrame and make it the central widget. @@ -738,7 +723,6 @@ void DynamicGUI::setWalletActionsEnabled(bool enabled) receiveCoinsMenuAction->setEnabled(enabled); historyAction->setEnabled(enabled); dynodeAction->setEnabled(enabled); - miningAction->setEnabled(enabled); bdapAction->setEnabled(enabled); encryptWalletAction->setEnabled(enabled); backupWalletAction->setEnabled(enabled); @@ -777,7 +761,6 @@ void DynamicGUI::createIconMenu(QMenu* pmenu) pmenu->addAction(receiveCoinsAction); pmenu->addAction(historyAction); pmenu->addAction(dynodeAction); - pmenu->addAction(miningAction); pmenu->addAction(bdapAction); pmenu->addSeparator(); pmenu->addAction(optionsAction); @@ -943,13 +926,6 @@ void DynamicGUI::gotoDynodePage() walletFrame->gotoDynodePage(); } -void DynamicGUI::gotoMiningPage() -{ - miningAction->setChecked(true); - if (walletFrame) - walletFrame->gotoMiningPage(); -} - void DynamicGUI::gotoBdapPage() { bdapAction->setChecked(true); diff --git a/src/qt/dynamicgui.h b/src/qt/dynamicgui.h index 06f0d65e91..dea740f60f 100644 --- a/src/qt/dynamicgui.h +++ b/src/qt/dynamicgui.h @@ -21,7 +21,6 @@ class ClientModel; class DynodeList; class HelpMessageDialog; -class MiningPage; class BdapPage; class BdapAccountTableModel; class BdapAddUserDialog; @@ -245,8 +244,6 @@ private Q_SLOTS: void gotoHistoryPage(); /** Switch to Dynode page */ void gotoDynodePage(); - /** Switch to mining page */ - void gotoMiningPage(); /** Switch to BDAP page */ void gotoBdapPage(); /** Show Sign/Verify Message dialog and switch to sign message tab */ diff --git a/src/qt/guiutil.cpp b/src/qt/guiutil.cpp index 1594ef1162..2c9af7d5d8 100644 --- a/src/qt/guiutil.cpp +++ b/src/qt/guiutil.cpp @@ -24,8 +24,6 @@ #include "util.h" #include "validation.h" // For minRelayTxFee -#include "miner/impl/miner-gpu.h" - #ifdef WIN32 #ifdef _WIN32_WINNT #undef _WIN32_WINNT @@ -1062,19 +1060,6 @@ int CPUMaxThreads() return nUseThreads; } -#ifdef ENABLE_GPU -int GPUMaxThreads() -{ - int nThreads = GPUMiner::TotalDevices(); - - int nUseThreads = GetArg("-genproclimit-gpu", -1); - if (nUseThreads < 0) { - nUseThreads = nThreads; - } - return nUseThreads; -} -#endif - QString FormatHashRate(qint64 n) { if (n == 0) diff --git a/src/qt/guiutil.h b/src/qt/guiutil.h index f431cd50e9..431e3e770a 100644 --- a/src/qt/guiutil.h +++ b/src/qt/guiutil.h @@ -9,7 +9,7 @@ #define DYNAMIC_QT_GUIUTIL_H #include "amount.h" -#include "miner/miner.h" +#include "miner.h" #include #include diff --git a/src/qt/hashrategraphwidget.cpp b/src/qt/hashrategraphwidget.cpp index 7b3fc2d2aa..e26f0df83a 100644 --- a/src/qt/hashrategraphwidget.cpp +++ b/src/qt/hashrategraphwidget.cpp @@ -51,18 +51,6 @@ void HashRateGraphWidget::initGraph(QPainter& painter) } } -int64_t HashRateGraphWidget::getHashRate() -{ - switch (graphType) { - case GraphType::MINER_CPU_HASHRATE: - return GetCPUHashRate(); - case GraphType::MINER_GPU_HASHRATE: - return GetGPUHashRate(); - default: - return GetHashRate(); - } -} - void HashRateGraphWidget::drawHashRate(QPainter& painter) { QPainterPath path; @@ -113,9 +101,7 @@ void HashRateGraphWidget::truncateSampleQueue() void HashRateGraphWidget::updateHashRateGraph() { int64_t iCurrentHashRate = 0; - if (graphType == GraphType::MINER_CPU_HASHRATE || graphType == GraphType::MINER_GPU_HASHRATE) { - iCurrentHashRate = getHashRate(); - } else if (graphType == GraphType::NETWORK_HASHRATE) { + if (graphType == GraphType::NETWORK_HASHRATE) { iCurrentHashRate = GUIUtil::GetNetworkHashPS(120, -1); } diff --git a/src/qt/hashrategraphwidget.h b/src/qt/hashrategraphwidget.h index 1982ae6308..abb981e395 100644 --- a/src/qt/hashrategraphwidget.h +++ b/src/qt/hashrategraphwidget.h @@ -51,7 +51,6 @@ public Q_SLOTS: void initGraph(QPainter& painter); void drawHashRate(QPainter& painter); void truncateSampleQueue(); - int64_t getHashRate(); unsigned int iDesiredSamples; int64_t iMaxHashRate; diff --git a/src/qt/walletframe.cpp b/src/qt/walletframe.cpp index 90e8727c1e..20176ae8f7 100644 --- a/src/qt/walletframe.cpp +++ b/src/qt/walletframe.cpp @@ -131,13 +131,6 @@ void WalletFrame::gotoDynodePage() i.value()->gotoDynodePage(); } -void WalletFrame::gotoMiningPage() -{ - QMap::const_iterator i; - for (i = mapWalletViews.constBegin(); i != mapWalletViews.constEnd(); ++i) - i.value()->gotoMiningPage(); -} - void WalletFrame::gotoBdapPage() { QMap::const_iterator i; diff --git a/src/qt/walletframe.h b/src/qt/walletframe.h index e4036ce9cc..9b5bd02af8 100644 --- a/src/qt/walletframe.h +++ b/src/qt/walletframe.h @@ -64,8 +64,6 @@ public Q_SLOTS: void gotoHistoryPage(); /** Switch to Dynode page */ void gotoDynodePage(); - /** Switch to mining page */ - void gotoMiningPage(); /** Switch to bdap page */ void gotoBdapPage(); /** Switch to receive coins page */ diff --git a/src/qt/walletview.cpp b/src/qt/walletview.cpp index 20b09a77af..8df2e93f49 100644 --- a/src/qt/walletview.cpp +++ b/src/qt/walletview.cpp @@ -13,7 +13,6 @@ #include "clientmodel.h" #include "dynamicgui.h" #include "guiutil.h" -#include "miningpage.h" #include "optionsmodel.h" #include "overviewpage.h" #include "platformstyle.h" @@ -45,13 +44,8 @@ WalletView::WalletView(const PlatformStyle* _platformStyle, QWidget* parent) : Q { // Create tabs overviewPage = new OverviewPage(platformStyle); - - sendCoinsPage = new SendCoinsDialog(platformStyle); - receiveCoinsPage = new ReceiveCoinsDialog(platformStyle); - - miningPage = new MiningPage(platformStyle); bdapPage = new BdapPage(platformStyle); usedSendingAddressesPage = new AddressBookPage(platformStyle, AddressBookPage::ForEditing, AddressBookPage::SendingTab, this); @@ -98,7 +92,6 @@ WalletView::WalletView(const PlatformStyle* _platformStyle, QWidget* parent) : Q if (settings.value("fShowDynodesTab").toBool()) { addWidget(dynodeListPage); } - addWidget(miningPage); addWidget(bdapPage); // Clicking on a transaction on the overview pre-selects the transaction on the transaction history page @@ -175,7 +168,6 @@ void WalletView::setWalletModel(WalletModel* _walletModel) if (settings.value("fShowDynodesTab").toBool()) { dynodeListPage->setWalletModel(_walletModel); } - miningPage->setModel(_walletModel); bdapPage->setModel(_walletModel); if (_walletModel) { @@ -262,11 +254,6 @@ void WalletView::gotoDynodePage() } } -void WalletView::gotoMiningPage() -{ - setCurrentWidget(miningPage); -} - void WalletView::gotoBdapPage() { setCurrentWidget(bdapPage); diff --git a/src/qt/walletview.h b/src/qt/walletview.h index 1070731eb4..8cd649a065 100644 --- a/src/qt/walletview.h +++ b/src/qt/walletview.h @@ -16,7 +16,6 @@ class DynamicGUI; class ClientModel; -class MiningPage; class BdapPage; class OverviewPage; class PlatformStyle; @@ -74,7 +73,6 @@ class WalletView : public QStackedWidget QWidget* transactionsPage; TransactionView* transactionView; DynodeList* dynodeListPage; - MiningPage* miningPage; BdapPage* bdapPage; QProgressDialog* progressDialog; @@ -92,8 +90,6 @@ public Q_SLOTS: void gotoHistoryPage(); /** Switch to Dynode page */ void gotoDynodePage(); - /** Switch to mining page */ - void gotoMiningPage(); /** Switch to bdap page */ void gotoBdapPage(); diff --git a/src/rpc/mining.cpp b/src/rpc/mining.cpp index 3a5421b543..a893969574 100644 --- a/src/rpc/mining.cpp +++ b/src/rpc/mining.cpp @@ -18,7 +18,6 @@ #include "fluid/fluiddb.h" #include "fluid/fluidmint.h" #include "init.h" -#include "miner/miner.h" #include "net.h" #include "pow.h" #include "rpc/server.h" @@ -31,6 +30,7 @@ #ifdef ENABLE_WALLET #include "wallet/wallet.h" #endif +#include "miner.h" #include "dynode-payments.h" #include "dynode-sync.h" @@ -301,14 +301,9 @@ UniValue setgenerate(const JSONRPCRequest& request) "See the getgenerate call for the current setting.\n" "\nArguments:\n" "1. generate (boolean, required) Set to true to turn on generation, false to turn off.\n" - "2. genproclimit-cpu (numeric, optional) Set the CPU thread limit for when generation is on. Can be -1 for unlimited.\n" - "3. genproclimit-gpu (numeric, optional) Set the GPU thread limit for when generation is on. Can be -1 for unlimited.\n" "\nExamples:\n" "\nSet the generation on with a limit of one CPU processor\n" + HelpExampleCli("setgenerate", "true 1") + -#if ENABLE_GPU - "\nSet the generation on with a limit of one GPU\n" + HelpExampleCli("setgenerate", "true 0 1") + -#endif "\nCheck the setting\n" + HelpExampleCli("getgenerate", "") + "\nTurn off generation\n" + HelpExampleCli("setgenerate", "false") + "\nUsing json rpc\n" + HelpExampleRpc("setgenerate", "true, 1")); @@ -320,86 +315,13 @@ UniValue setgenerate(const JSONRPCRequest& request) if (request.params.size() > 0) fGenerate = request.params[0].get_bool(); - int nGenProcLimitCPU = GetArg("-genproclimit-cpu", DEFAULT_GENERATE_THREADS_CPU); - if (request.params.size() > 1) - nGenProcLimitCPU = request.params[1].get_int(); - - int nGenProcLimitGPU = GetArg("-genproclimit-gpu", DEFAULT_GENERATE_THREADS_GPU); - if (request.params.size() > 2) - nGenProcLimitGPU = request.params[2].get_int(); - - if (nGenProcLimitCPU == 0 && nGenProcLimitGPU == 0) - fGenerate = false; - ForceSetArg("-gen", fGenerate ? "1" : "0"); - ForceSetArg("-genproclimit-cpu", nGenProcLimitCPU); - ForceSetArg("-genproclimit-gpu", nGenProcLimitGPU); - if (fGenerate) { - #ifdef ENABLE_WALLET - //Check to see if wallet needs upgrading - if(pwalletMain->WalletNeedsUpgrading()) - throw JSONRPCError(RPC_WALLET_NEEDS_UPGRADING, "Error: Your wallet has not been fully upgraded to version 2.4. Please unlock your wallet to continue."); - #endif //ENABLE_WALLET - - InitMiners(Params(), *g_connman); - SetCPUMinerThreads(nGenProcLimitCPU); - SetGPUMinerThreads(nGenProcLimitGPU); - StartMiners(); } else { - ShutdownMiners(); } - return NullUniValue; } -UniValue gethashespersec(const JSONRPCRequest& request) -{ - if (request.fHelp || request.params.size() != 0) - throw std::runtime_error( - "gethashespersec\n" - "\nReturns a recent hashes per second performance measurement while generating.\n" - "See the getgenerate and setgenerate calls to turn generation on and off.\n" - "\nResult:\n" - "n (numeric) The recent hashes per second when generation is on (will return 0 if generation is off)\n" - "\nExamples:\n" + - HelpExampleCli("gethashespersec", "") + HelpExampleRpc("gethashespersec", "")); - - return GetHashRate(); -} - - -UniValue getcpuhashespersec(const JSONRPCRequest& request) -{ - if (request.fHelp || request.params.size() != 0) - throw std::runtime_error( - "getcpuhashespersec\n" - "\nReturns a recent CPU hashes per second performance measurement while generating.\n" - "See the getgenerate and setgenerate calls to turn generation on and off.\n" - "\nResult:\n" - "n (numeric) The recent CPU hashes per second when generation is on (will return 0 if generation is off)\n" - "\nExamples:\n" + - HelpExampleCli("getcpuhashespersec", "") + HelpExampleRpc("getcpuhashespersec", "")); - - return GetCPUHashRate(); -} - - -UniValue getgpuhashespersec(const JSONRPCRequest& request) -{ - if (request.fHelp || request.params.size() != 0) - throw std::runtime_error( - "getgpuhashespersec\n" - "\nReturns a recent GPU hashes per second performance measurement while generating.\n" - "See the getgenerate and setgenerate calls to turn generation on and off.\n" - "\nResult:\n" - "n (numeric) The recent GPU hashes per second when generation is on (will return 0 if generation is off)\n" - "\nExamples:\n" + - HelpExampleCli("getgpuhashespersec", "") + HelpExampleRpc("getgpuhashespersec", "")); - - return GetGPUHashRate(); -} - UniValue getmininginfo(const JSONRPCRequest& request) { if (request.fHelp || request.params.size() != 0) @@ -417,9 +339,6 @@ UniValue getmininginfo(const JSONRPCRequest& request) " \"genproclimit\": n (numeric) The processor limit for generation. -1 if no generation. (see getgenerate or setgenerate calls)\n" " \"pooledtx\": n (numeric) The size of the mempool\n" " \"chain\": \"xxxx\", (string) current network name as defined in BIP70 (main, test, regtest)\n" - " \"hashespersec\": n (numeric) The recent hashes per second when generation is on (will return 0 if generation is off)\n" - " \"cpuhashespersec\": n (numeric) The recent CPU hashes per second when generation is on (will return 0 if generation is off)\n" - " \"gpuhashespersec\": n (numeric) The recent GPU hashes per second when generation is on (will return 0 if generation is off)\n" "}\n" "\nExamples:\n" + HelpExampleCli("getmininginfo", "") + HelpExampleRpc("getmininginfo", "")); @@ -439,9 +358,6 @@ UniValue getmininginfo(const JSONRPCRequest& request) obj.push_back(Pair("pooledtx", (uint64_t)mempool.size())); obj.push_back(Pair("chain", Params().NetworkIDString())); obj.push_back(Pair("generate", getgenerate(request))); - obj.push_back(Pair("hashespersec", gethashespersec(request))); - obj.push_back(Pair("cpuhashespersec", getcpuhashespersec(request))); - obj.push_back(Pair("gpuhashespersec", getgpuhashespersec(request))); return obj; } @@ -1168,9 +1084,6 @@ static const CRPCCommand commands[] = {"generating", "setgenerate", &setgenerate, true, {"generate", "genproclimit-cpu", "genproclimit-gpu"}}, {"generating", "generate", &generate, true, {"nblocks", "maxtries"}}, {"generating", "generatetoaddress", &generatetoaddress, true, {"nblocks", "address", "maxtries"}}, - {"generating", "gethashespersec", &gethashespersec, true, {}}, - {"generating", "getcpuhashespersec", &getcpuhashespersec, true, {}}, - {"generating", "getgpuhashespersec", &getgpuhashespersec, true, {}}, {"util", "estimatefee", &estimatefee, true, {"nblocks"}}, {"util", "estimatepriority", &estimatepriority, true, {"nblocks"}}, diff --git a/src/test/miner_tests.cpp b/src/test/miner_tests.cpp index 69c4191efd..8315eb8240 100644 --- a/src/test/miner_tests.cpp +++ b/src/test/miner_tests.cpp @@ -10,7 +10,7 @@ #include "consensus/validation.h" #include "validation.h" #include "dynode-payments.h" -#include "miner/miner.h" +#include "miner.h" #include "pubkey.h" #include "script/standard.h" #include "txmempool.h" @@ -18,8 +18,6 @@ #include "util.h" #include "utilstrencodings.h" -#include "miner/impl/miner-gpu.h" - #include "test/test_dynamic.h" #include @@ -77,43 +75,6 @@ bool TestSequenceLocks(const CTransaction &tx, int flags) return CheckSequenceLocks(tx, flags); } -#ifdef ENABLE_GPU -BOOST_AUTO_TEST_CASE(ScanNoncesGPU_check) -{ - gpu::Context global; - auto& devices = global.getAllDevices(); - auto& device = devices[0]; - gpu::ProgramContext context(&global, {device}, argon2gpu::ARGON2_D, argon2gpu::ARGON2_VERSION_10); - gpu::Params params((std::size_t)OUTPUT_BYTES, 2, 500, 8); - gpu::ProcessingUnit processingUnit(&context, ¶ms, &device, 256, false, false); - - CBlock block; - CDataStream stream(ParseHex("0100000075616236cc2126035fadb38deb65b9102cc2c41c09cdf29fc051906800000000fe7d5e12ef0ff901f6050211249919b1c0653771832b3a80c66cea42847f0ae1d4d26e49ffff001f00f0a4410401000000010000000000000000000000000000000000000000000000000000000000000000ffffffff0804ffff001d029105ffffffff0100f2052a010000004341046d8709a041d34357697dfcb30a9d05900a6294078012bf3bb09c6f9b525f1d16d5503d7905db1ada9501446ea00728668fc5719aa80be2fdfc8a858a4dbdd4fbac00000000010000000255605dc6f5c3dc148b6da58442b0b2cd422be385eab2ebea4119ee9c268d28350000000049483045022100aa46504baa86df8a33b1192b1b9367b4d729dc41e389f2c04f3e5c7f0559aae702205e82253a54bf5c4f65b7428551554b2045167d6d206dfe6a2e198127d3f7df1501ffffffff55605dc6f5c3dc148b6da58442b0b2cd422be385eab2ebea4119ee9c268d2835010000004847304402202329484c35fa9d6bb32a55a70c0982f606ce0e3634b69006138683bcd12cbb6602200c28feb1e2555c3210f1dddb299738b4ff8bbe9667b68cb8764b5ac17b7adf0001ffffffff0200e1f505000000004341046a0765b5865641ce08dd39690aade26dfbf5511430ca428a3089261361cef170e3929a68aee3d8d4848b0c5111b0a37b82b86ad559fd2a745b44d8e8d9dfdc0cac00180d8f000000004341044a656f065871a353f216ca26cef8dde2f03e8c16202d2e8ad769f02032cb86a5eb5e56842e92e19141d60a01928f8dd2c875a390f67c1f6c94cfc617c0ea45afac0000000001000000025f9a06d3acdceb56be1bfeaa3e8a25e62d182fa24fefe899d1c17f1dad4c2028000000004847304402205d6058484157235b06028c30736c15613a28bdb768ee628094ca8b0030d4d6eb0220328789c9a2ec27ddaec0ad5ef58efded42e6ea17c2e1ce838f3d6913f5e95db601ffffffff5f9a06d3acdceb56be1bfeaa3e8a25e62d182fa24fefe899d1c17f1dad4c2028010000004a493046022100c45af050d3cea806cedd0ab22520c53ebe63b987b8954146cdca42487b84bdd6022100b9b027716a6b59e640da50a864d6dd8a0ef24c76ce62391fa3eabaf4d2886d2d01ffffffff0200e1f505000000004341046a0765b5865641ce08dd39690aade26dfbf5511430ca428a3089261361cef170e3929a68aee3d8d4848b0c5111b0a37b82b86ad559fd2a745b44d8e8d9dfdc0cac00180d8f000000004341046a0765b5865641ce08dd39690aade26dfbf5511430ca428a3089261361cef170e3929a68aee3d8d4848b0c5111b0a37b82b86ad559fd2a745b44d8e8d9dfdc0cac000000000100000002e2274e5fea1bf29d963914bd301aa63b64daaf8a3e88f119b5046ca5738a0f6b0000000048473044022016e7a727a061ea2254a6c358376aaa617ac537eb836c77d646ebda4c748aac8b0220192ce28bf9f2c06a6467e6531e27648d2b3e2e2bae85159c9242939840295ba501ffffffffe2274e5fea1bf29d963914bd301aa63b64daaf8a3e88f119b5046ca5738a0f6b010000004a493046022100b7a1a755588d4190118936e15cd217d133b0e4a53c3c15924010d5648d8925c9022100aaef031874db2114f2d869ac2de4ae53908fbfea5b2b1862e181626bb9005c9f01ffffffff0200e1f505000000004341044a656f065871a353f216ca26cef8dde2f03e8c16202d2e8ad769f02032cb86a5eb5e56842e92e19141d60a01928f8dd2c875a390f67c1f6c94cfc617c0ea45afac00180d8f000000004341046a0765b5865641ce08dd39690aade26dfbf5511430ca428a3089261361cef170e3929a68aee3d8d4848b0c5111b0a37b82b86ad559fd2a745b44d8e8d9dfdc0cac00000000"), SER_NETWORK, PROTOCOL_VERSION); - stream >> block; - - arith_uint256 _hash_target = arith_uint256().SetCompact(block.nBits); - const std::uint64_t device_target = (ArithToUint256(_hash_target).GetUint64(3)); - - const void* input = BEGIN(block.nVersion); - std::uint32_t result_nonce; - std::uint32_t start_nonce = 0; - - while ( true ){ - result_nonce = processingUnit.scanNonces(input, start_nonce, device_target); - if ( result_nonce < std::numeric_limits::max() || start_nonce == std::numeric_limits::max()) - break; - start_nonce += 256; - } - - block.nNonce = result_nonce; - BOOST_TEST_MESSAGE("Found Nonce : " << block.nNonce); - uint256 cpuHash = block.GetHash(); - BOOST_CHECK(UintToArith256(cpuHash) <= _hash_target); - -} -#endif //ENABLE_GPU - - // NOTE: These tests rely on CreateNewBlock doing its own self-validation! BOOST_AUTO_TEST_CASE(CreateNewBlock_validity) { diff --git a/src/test/test_dynamic.cpp b/src/test/test_dynamic.cpp index c0cf112b7a..d54821eeeb 100644 --- a/src/test/test_dynamic.cpp +++ b/src/test/test_dynamic.cpp @@ -11,8 +11,8 @@ #include "consensus/consensus.h" #include "consensus/validation.h" #include "key.h" +#include "miner.h" #include "validation.h" -#include "miner/miner.h" #include "net_processing.h" #include "pubkey.h" #include "random.h" diff --git a/src/test/txvalidationcache_tests.cpp b/src/test/txvalidationcache_tests.cpp index c281581185..b7594ee88d 100644 --- a/src/test/txvalidationcache_tests.cpp +++ b/src/test/txvalidationcache_tests.cpp @@ -5,7 +5,7 @@ #include "consensus/validation.h" #include "key.h" #include "validation.h" -#include "miner/miner.h" +#include "miner.h" #include "pubkey.h" #include "txmempool.h" #include "random.h" From ab10d0b7b624850cc4e294362bb5075b15fbca88 Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <6098974-kittywhiskers@users.noreply.gitlab.com> Date: Sun, 16 May 2021 22:20:34 +0530 Subject: [PATCH 2/4] core: cleanup unused opencl/cuda logic --- src/crypto/argon2gpu/blake2b.cpp | 228 - src/crypto/argon2gpu/blake2b.h | 57 - src/crypto/argon2gpu/common.cpp | 223 - src/crypto/argon2gpu/common.h | 93 - src/crypto/argon2gpu/cuda/blake2b-kernels.cu | 394 - src/crypto/argon2gpu/cuda/blake2b-kernels.h | 149 - src/crypto/argon2gpu/cuda/cuda-exception.h | 56 - src/crypto/argon2gpu/cuda/device.cpp | 50 - src/crypto/argon2gpu/cuda/device.h | 61 - src/crypto/argon2gpu/cuda/global-context.cpp | 38 - src/crypto/argon2gpu/cuda/global-context.h | 46 - src/crypto/argon2gpu/cuda/kernels.cu | 942 -- src/crypto/argon2gpu/cuda/kernels.h | 83 - src/crypto/argon2gpu/cuda/processing-unit.cpp | 63 - src/crypto/argon2gpu/cuda/processing-unit.h | 63 - src/crypto/argon2gpu/cuda/program-context.cpp | 37 - src/crypto/argon2gpu/cuda/program-context.h | 54 - src/crypto/argon2gpu/opencl/cl.hpp | 13089 ---------------- src/crypto/argon2gpu/opencl/device.cpp | 239 - src/crypto/argon2gpu/opencl/device.h | 61 - .../argon2gpu/opencl/global-context.cpp | 46 - src/crypto/argon2gpu/opencl/global-context.h | 44 - src/crypto/argon2gpu/opencl/kernel-loader.cpp | 58 - src/crypto/argon2gpu/opencl/kernel-loader.h | 42 - src/crypto/argon2gpu/opencl/kernel-runner.cpp | 122 - src/crypto/argon2gpu/opencl/kernel-runner.h | 85 - src/crypto/argon2gpu/opencl/kernel.cl | 1766 --- src/crypto/argon2gpu/opencl/opencl.h | 37 - .../argon2gpu/opencl/processing-unit.cpp | 51 - src/crypto/argon2gpu/opencl/processing-unit.h | 63 - .../argon2gpu/opencl/program-context.cpp | 41 - src/crypto/argon2gpu/opencl/program-context.h | 60 - 32 files changed, 18441 deletions(-) delete mode 100644 src/crypto/argon2gpu/blake2b.cpp delete mode 100644 src/crypto/argon2gpu/blake2b.h delete mode 100644 src/crypto/argon2gpu/common.cpp delete mode 100644 src/crypto/argon2gpu/common.h delete mode 100644 src/crypto/argon2gpu/cuda/blake2b-kernels.cu delete mode 100644 src/crypto/argon2gpu/cuda/blake2b-kernels.h delete mode 100644 src/crypto/argon2gpu/cuda/cuda-exception.h delete mode 100644 src/crypto/argon2gpu/cuda/device.cpp delete mode 100644 src/crypto/argon2gpu/cuda/device.h delete mode 100644 src/crypto/argon2gpu/cuda/global-context.cpp delete mode 100644 src/crypto/argon2gpu/cuda/global-context.h delete mode 100644 src/crypto/argon2gpu/cuda/kernels.cu delete mode 100644 src/crypto/argon2gpu/cuda/kernels.h delete mode 100644 src/crypto/argon2gpu/cuda/processing-unit.cpp delete mode 100644 src/crypto/argon2gpu/cuda/processing-unit.h delete mode 100644 src/crypto/argon2gpu/cuda/program-context.cpp delete mode 100644 src/crypto/argon2gpu/cuda/program-context.h delete mode 100644 src/crypto/argon2gpu/opencl/cl.hpp delete mode 100644 src/crypto/argon2gpu/opencl/device.cpp delete mode 100644 src/crypto/argon2gpu/opencl/device.h delete mode 100644 src/crypto/argon2gpu/opencl/global-context.cpp delete mode 100644 src/crypto/argon2gpu/opencl/global-context.h delete mode 100644 src/crypto/argon2gpu/opencl/kernel-loader.cpp delete mode 100644 src/crypto/argon2gpu/opencl/kernel-loader.h delete mode 100644 src/crypto/argon2gpu/opencl/kernel-runner.cpp delete mode 100644 src/crypto/argon2gpu/opencl/kernel-runner.h delete mode 100644 src/crypto/argon2gpu/opencl/kernel.cl delete mode 100644 src/crypto/argon2gpu/opencl/opencl.h delete mode 100644 src/crypto/argon2gpu/opencl/processing-unit.cpp delete mode 100644 src/crypto/argon2gpu/opencl/processing-unit.h delete mode 100644 src/crypto/argon2gpu/opencl/program-context.cpp delete mode 100644 src/crypto/argon2gpu/opencl/program-context.h diff --git a/src/crypto/argon2gpu/blake2b.cpp b/src/crypto/argon2gpu/blake2b.cpp deleted file mode 100644 index 4d17d560f9..0000000000 --- a/src/crypto/argon2gpu/blake2b.cpp +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/blake2b.h" - -#include - -namespace argon2gpu -{ -static const std::uint64_t blake2b_IV[8] = { - UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), - UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), - UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), - UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)}; - -static const unsigned int blake2b_sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, -}; - -#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) - -#define G(m, r, i, a, b, c, d) \ - do { \ - a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define ROUND(m, v, r) \ - do { \ - G(m, r, 0, v[0], v[4], v[8], v[12]); \ - G(m, r, 1, v[1], v[5], v[9], v[13]); \ - G(m, r, 2, v[2], v[6], v[10], v[14]); \ - G(m, r, 3, v[3], v[7], v[11], v[15]); \ - G(m, r, 4, v[0], v[5], v[10], v[15]); \ - G(m, r, 5, v[1], v[6], v[11], v[12]); \ - G(m, r, 6, v[2], v[7], v[8], v[13]); \ - G(m, r, 7, v[3], v[4], v[9], v[14]); \ - } while ((void)0, 0) - -static std::uint64_t load64(const void* src) -{ - auto in = static_cast(src); - std::uint64_t res = *in++; - res |= static_cast(*in++) << 8; - res |= static_cast(*in++) << 16; - res |= static_cast(*in++) << 24; - res |= static_cast(*in++) << 32; - res |= static_cast(*in++) << 40; - res |= static_cast(*in++) << 48; - res |= static_cast(*in++) << 56; - return res; -} - -static void store64(void* dst, std::uint64_t v) -{ - auto out = static_cast(dst); - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); -} - -void Blake2b::init(std::size_t outlen) -{ - t[1] = t[0] = 0; - bufLen = 0; - - std::memcpy(h, blake2b_IV, sizeof(h)); - - h[0] ^= static_cast(outlen) | - (UINT64_C(1) << 16) | (UINT64_C(1) << 24); -} - -void Blake2b::compress(const void* block, std::uint64_t f0) -{ - std::uint64_t m[16]; - std::uint64_t v[16]; - - auto in = static_cast(block); - - m[0] = load64(in + 0); - m[1] = load64(in + 1); - m[2] = load64(in + 2); - m[3] = load64(in + 3); - m[4] = load64(in + 4); - m[5] = load64(in + 5); - m[6] = load64(in + 6); - m[7] = load64(in + 7); - m[8] = load64(in + 8); - m[9] = load64(in + 9); - m[10] = load64(in + 10); - m[11] = load64(in + 11); - m[12] = load64(in + 12); - m[13] = load64(in + 13); - m[14] = load64(in + 14); - m[15] = load64(in + 15); - - v[0] = h[0]; - v[1] = h[1]; - v[2] = h[2]; - v[3] = h[3]; - v[4] = h[4]; - v[5] = h[5]; - v[6] = h[6]; - v[7] = h[7]; - v[8] = blake2b_IV[0]; - v[9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = blake2b_IV[4] ^ t[0]; - v[13] = blake2b_IV[5] ^ t[1]; - v[14] = blake2b_IV[6] ^ f0; - v[15] = blake2b_IV[7]; - - ROUND(m, v, 0); - ROUND(m, v, 1); - ROUND(m, v, 2); - ROUND(m, v, 3); - ROUND(m, v, 4); - ROUND(m, v, 5); - ROUND(m, v, 6); - ROUND(m, v, 7); - ROUND(m, v, 8); - ROUND(m, v, 9); - ROUND(m, v, 10); - ROUND(m, v, 11); - - h[0] ^= v[0] ^ v[8]; - h[1] ^= v[1] ^ v[9]; - h[2] ^= v[2] ^ v[10]; - h[3] ^= v[3] ^ v[11]; - h[4] ^= v[4] ^ v[12]; - h[5] ^= v[5] ^ v[13]; - h[6] ^= v[6] ^ v[14]; - h[7] ^= v[7] ^ v[15]; -} - -void Blake2b::incrementCounter(std::uint64_t inc) -{ - t[0] += inc; - t[1] += (t[0] < inc); -} - -void Blake2b::update(const void* in, std::size_t inLen) -{ - auto bin = static_cast(in); - - if (bufLen + inLen > BLOCK_BYTES) { - std::size_t have = bufLen; - std::size_t left = BLOCK_BYTES - have; - std::memcpy(buf + have, bin, left); - - incrementCounter(BLOCK_BYTES); - compress(buf, 0); - - bufLen = 0; - inLen -= left; - bin += left; - - while (inLen > BLOCK_BYTES) { - incrementCounter(BLOCK_BYTES); - compress(bin, 0); - inLen -= BLOCK_BYTES; - bin += BLOCK_BYTES; - } - } - std::memcpy(buf + bufLen, bin, inLen); - bufLen += inLen; -} - -void Blake2b::final(void* out, std::size_t outLen) -{ - std::uint8_t buffer[OUT_BYTES] = {0}; - - incrementCounter(bufLen); - std::memset(buf + bufLen, 0, BLOCK_BYTES - bufLen); - compress(buf, UINT64_C(0xFFFFFFFFFFFFFFFF)); - - for (unsigned int i = 0; i < 8; i++) { - store64(buffer + i * sizeof(std::uint64_t), h[i]); - } - - std::memcpy(out, buffer, outLen); -} - -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/blake2b.h b/src/crypto/argon2gpu/blake2b.h deleted file mode 100644 index 2c7ee32f16..0000000000 --- a/src/crypto/argon2gpu/blake2b.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015 Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_BLAKE2B_H -#define ARGON2_BLAKE2B_H - -#include - -#include - -namespace argon2gpu -{ - -class Blake2b -{ - public: - enum - { - BLOCK_BYTES = 128, - OUT_BYTES = 64, - }; - - private: - std::uint64_t h[8]; - std::uint64_t t[2]; - std::uint8_t buf[BLOCK_BYTES]; - size_t bufLen; - - void compress(const void *block, std::uint64_t f0); - void incrementCounter(std::uint64_t inc); - - public: - Blake2b() : h(), t(), buf(), bufLen(0) {} - - void init(size_t outlen); - void update(const void *in, size_t inLen); - void final(void *out, size_t outLen); -}; - -} // namespace argon2gpu - -#endif // ARGON2_BLAKE2B_H diff --git a/src/crypto/argon2gpu/common.cpp b/src/crypto/argon2gpu/common.cpp deleted file mode 100644 index eac6d45dee..0000000000 --- a/src/crypto/argon2gpu/common.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/blake2b.h" -#include "crypto/argon2gpu/common.h" - -#include -#include - -#include - -#ifdef DEBUG -#include -#endif - -namespace argon2gpu -{ -static void store32(void* dst, std::uint32_t v) -{ - auto out = static_cast(dst); - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); - v >>= 8; - *out++ = static_cast(v); -} - -Argon2Params::Argon2Params(std::size_t outLen, std::size_t t_cost, std::size_t m_cost, std::size_t lanes) - : outLen(outLen), t_cost(t_cost), m_cost(m_cost), lanes(lanes) -{ - // TODO validate inputs - std::size_t segments = lanes * ARGON2_SYNC_POINTS; - segmentBlocks = std::max(m_cost, 2 * segments) / segments; -} - -void Argon2Params::digestLong(void* out, std::size_t outLen, const void* in, std::size_t inLen) -{ - auto bout = static_cast(out); - std::uint8_t outlen_bytes[sizeof(std::uint32_t)]; - Blake2b blake; - - store32(outlen_bytes, static_cast(outLen)); - if (outLen <= Blake2b::OUT_BYTES) { - blake.init(outLen); - blake.update(outlen_bytes, sizeof(outlen_bytes)); - blake.update(in, inLen); - blake.final(out, outLen); - } else { - std::uint8_t out_buffer[Blake2b::OUT_BYTES]; - - blake.init(Blake2b::OUT_BYTES); - blake.update(outlen_bytes, sizeof(outlen_bytes)); - blake.update(in, inLen); - blake.final(out_buffer, Blake2b::OUT_BYTES); - - std::memcpy(bout, out_buffer, Blake2b::OUT_BYTES / 2); - bout += Blake2b::OUT_BYTES / 2; - - std::size_t toProduce = outLen - Blake2b::OUT_BYTES / 2; - while (toProduce > Blake2b::OUT_BYTES) { - blake.init(Blake2b::OUT_BYTES); - blake.update(out_buffer, Blake2b::OUT_BYTES); - blake.final(out_buffer, Blake2b::OUT_BYTES); - - std::memcpy(bout, out_buffer, Blake2b::OUT_BYTES / 2); - bout += Blake2b::OUT_BYTES / 2; - toProduce -= Blake2b::OUT_BYTES / 2; - } - - blake.init(toProduce); - blake.update(out_buffer, Blake2b::OUT_BYTES); - blake.final(bout, toProduce); - } -} - -void Argon2Params::initialHash( - void* out, - const void* input, - std::size_t inputLen, - Type type, - Version version) const -{ - Blake2b blake; - std::uint8_t value[sizeof(std::uint32_t)]; - - blake.init(ARGON2_PREHASH_DIGEST_LENGTH); - - store32(value, lanes); - blake.update(value, sizeof(value)); - store32(value, outLen); - blake.update(value, sizeof(value)); - store32(value, m_cost); - blake.update(value, sizeof(value)); - store32(value, t_cost); - blake.update(value, sizeof(value)); - store32(value, version); - blake.update(value, sizeof(value)); - store32(value, type); - blake.update(value, sizeof(value)); - store32(value, inputLen); - blake.update(value, sizeof(value)); - blake.update(input, inputLen); - store32(value, inputLen); - blake.update(value, sizeof(value)); // saltLen - blake.update(input, inputLen); // salt, saltLen - store32(value, 0); - blake.update(value, sizeof(value)); - blake.update(NULL, 0); - store32(value, 0); - blake.update(value, sizeof(value)); - blake.update(NULL, 0); - - blake.final(out, ARGON2_PREHASH_DIGEST_LENGTH); -} - -void Argon2Params::fillFirstBlocks( - void* memory, - const void* input, - std::size_t inputLen, - Type type, - Version version) const -{ - std::uint8_t initHash[ARGON2_PREHASH_SEED_LENGTH]; - initialHash(initHash, input, inputLen, type, version); - -#ifdef DEBUG - std::fprintf(stderr, "Initial hash: "); - for (std::size_t i = 0; i < ARGON2_PREHASH_DIGEST_LENGTH; i++) { - std::fprintf(stderr, "%02x", (unsigned int)initHash[i]); - } - std::fprintf(stderr, "\n"); -#endif - - auto bmemory = static_cast(memory); - - store32(initHash + ARGON2_PREHASH_DIGEST_LENGTH, 0); - for (std::uint32_t l = 0; l < lanes; l++) { - store32(initHash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); - digestLong(bmemory, ARGON2_BLOCK_SIZE, initHash, sizeof(initHash)); - -#ifdef DEBUG - std::fprintf(stderr, "Initial block 0 for lane %u: {\n", (unsigned)l); - for (std::size_t i = 0; i < ARGON2_BLOCK_SIZE / 8; i++) { - std::fprintf(stderr, " 0x"); - for (std::size_t k = 0; k < 8; k++) { - std::fprintf(stderr, "%02x", (unsigned)bmemory[i * 8 + 7 - k]); - } - std::fprintf(stderr, "UL,\n"); - } - std::fprintf(stderr, "}\n"); -#endif - - bmemory += ARGON2_BLOCK_SIZE; - } - - store32(initHash + ARGON2_PREHASH_DIGEST_LENGTH, 1); - for (std::uint32_t l = 0; l < lanes; l++) { - store32(initHash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); - digestLong(bmemory, ARGON2_BLOCK_SIZE, initHash, sizeof(initHash)); - -#ifdef DEBUG - std::fprintf(stderr, "Initial block 1 for lane %u: {\n", (unsigned)l); - for (std::size_t i = 0; i < ARGON2_BLOCK_SIZE / 8; i++) { - std::fprintf(stderr, " 0x"); - for (std::size_t k = 0; k < 8; k++) { - std::fprintf(stderr, "%02x", (unsigned)bmemory[i * 8 + 7 - k]); - } - std::fprintf(stderr, "UL,\n"); - } - std::fprintf(stderr, "}\n"); -#endif - - bmemory += ARGON2_BLOCK_SIZE; - } -} - -void Argon2Params::finalize(void* out, const void* memory) const -{ - /* TODO: nicify this (or move it into the kernel (I mean, we currently - * have all lanes in one work-group...) */ - struct block { - std::uint64_t v[ARGON2_BLOCK_SIZE / 8]; - }; - - auto cursor = static_cast(memory); -#ifdef DEBUG - for (std::size_t l = 0; l < getLanes(); l++) { - for (std::size_t k = 0; k < ARGON2_BLOCK_SIZE / 8; k++) { - std::fprintf(stderr, "Block %04u [%3u]: %016llx\n", - (unsigned)l, (unsigned)k, - (unsigned long long)cursor[l].v[k]); - } - } -#endif - - block xored = *cursor; - for (std::uint32_t l = 1; l < lanes; l++) { - ++cursor; - for (std::size_t i = 0; i < ARGON2_BLOCK_SIZE / 8; i++) { - xored.v[i] ^= cursor->v[i]; - } - } - - digestLong(out, outLen, &xored, ARGON2_BLOCK_SIZE); -} - -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/common.h b/src/crypto/argon2gpu/common.h deleted file mode 100644 index 8e37c6ebb8..0000000000 --- a/src/crypto/argon2gpu/common.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015 Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2COMMON_H -#define ARGON2COMMON_H - -#include - -#include - -#include "crypto/argon2d/argon2.h" - -namespace argon2gpu -{ - -enum -{ - ARGON2_BLOCK_SIZE = 1024, - ARGON2_PREHASH_DIGEST_LENGTH = 64, - ARGON2_PREHASH_SEED_LENGTH = 72 -}; - -enum Type -{ - ARGON2_D = 0, - ARGON2_I = 1, - ARGON2_ID = 2 -}; - -enum Version -{ - ARGON2_VERSION_10 = 0x10, - ARGON2_VERSION_13 = 0x13 -}; - -class Argon2Params -{ - private: - std::uint32_t outLen; - std::uint32_t t_cost, m_cost, lanes; - - std::uint32_t segmentBlocks; - - static void digestLong(void *out, size_t outLen, - const void *in, size_t inLen); - - void initialHash(void *out, const void *input, size_t inputLen, - Type type, Version version) const; - - public: - std::uint32_t getOutputLength() const { return outLen; } - - std::uint32_t getTimeCost() const { return t_cost; } - std::uint32_t getMemoryCost() const { return m_cost; } - std::uint32_t getLanes() const { return lanes; } - - std::uint32_t getSegmentBlocks() const { return segmentBlocks; } - std::uint32_t getLaneBlocks() const - { - return segmentBlocks * ARGON2_SYNC_POINTS; - } - std::uint32_t getMemoryBlocks() const { return getLaneBlocks() * lanes; } - size_t getMemorySize() const - { - return static_cast(getMemoryBlocks()) * ARGON2_BLOCK_SIZE; - } - - Argon2Params(size_t outLen, size_t t_cost, size_t m_cost, size_t lanes); - - void fillFirstBlocks(void *memory, const void *pwd, size_t pwdLen, - Type type, Version version) const; - - void finalize(void *out, const void *memory) const; -}; - -} // namespace argon2gpu - -#endif // ARGON2COMMON_H diff --git a/src/crypto/argon2gpu/cuda/blake2b-kernels.cu b/src/crypto/argon2gpu/cuda/blake2b-kernels.cu deleted file mode 100644 index fd3805d4f5..0000000000 --- a/src/crypto/argon2gpu/cuda/blake2b-kernels.cu +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright (C) 2018-2021 Ehsan Dalvand , Alireza Jahandideh - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -/* For IDE: */ -#ifndef __CUDACC__ -#define __CUDACC__ -#endif - -#include "crypto/argon2gpu/cuda/kernels.h" -#include "crypto/argon2gpu/cuda/blake2b-kernels.h" - -#define INPUT_LEN 80 -__constant__ uint32_t d_data[20]; - -#define CAT(x, y) CAT_(x, y) -#define CAT_(x, y) x ## y - -#define G(a,b,c,d,x,col) { \ - ref1=sigma[r][col]>>16*x;\ - ref2=sigma[r][col]>>(16*x+8);\ - CAT(v,a) += CAT(v,b)+m[ref1]; \ - CAT(v,d) = rotate64(CAT(v,d) ^ CAT(v,a),32); \ - CAT(v,c) += CAT(v,d); \ - CAT(v,b) = rotate64(CAT(v,b) ^ CAT(v,c), 24); \ - CAT(v,a) +=CAT(v,b)+m[ref2]; \ - CAT(v,d) = rotate64( CAT(v,d) ^ CAT(v,a), 16); \ - CAT(v,c) += CAT(v,d); \ - CAT(v,b) = rotate64( CAT(v,b) ^ CAT(v,c), 63); \ -} - -__device__ __forceinline__ -void enc32(void *pp, const uint32_t x) { - uint8_t *p = (uint8_t *) pp; - - p[3] = x & 0xff; - p[2] = (x >> 8) & 0xff; - p[1] = (x >> 16) & 0xff; - p[0] = (x >> 24) & 0xff; -} - - -__device__ void load_block(uint32_t* dest, uint32_t* src, uint32_t idx) { - - uint32_t i, j; - - for (i = 0; i < 64; i++) { - j = idx + i * 4; - dest[j] = src[j]; - } - -} - -__device__ -void blake2b_compress_1w( - uint64x8* state, const uint64_t* m, - const uint32_t step, const bool lastChunk = false, - const size_t lastChunkSize = 0) -{ - - uint64_t v0, v1, v2, v3, v4, v5, v6, - v7, v8, v9, v10, v11, v12, - v13, v14, v15; - - v0 = state->s0; - v1 = state->s1; - v2 = state->s2; - v3 = state->s3; - v4 = state->s4; - v5 = state->s5; - v6 = state->s6; - v7 = state->s7; - v8 = blake2b_IV[0]; - v9 = blake2b_IV[1]; - v10 = blake2b_IV[2]; - v11 = blake2b_IV[3]; - - if (lastChunk) { - v12 = blake2b_IV[4] ^ (step - 1) * BLAKE_BLOCKBYTES + lastChunkSize; - v14 = blake2b_IV[6] ^ (uint64_t) -1; - - } else { - v12 = blake2b_IV[4] ^ step * BLAKE_BLOCKBYTES; - v14 = blake2b_IV[6]; - } - - v13 = blake2b_IV[5]; - v15 = blake2b_IV[7]; - -#pragma unroll 12 - for (int r = 0; r < 12; r++) { - uint8_t ref1, ref2; - - /* column step */ - G(0, 4, 8, 12, 0, 0); - G(1, 5, 9, 13, 1, 0); - G(2, 6, 10, 14, 2, 0); - G(3, 7, 11, 15, 3, 0); - - /* diagonal step */ - G(0, 5, 10, 15, 0, 1); - G(1, 6, 11, 12, 1, 1); - G(2, 7, 8, 13, 2, 1); - G(3, 4, 9, 14, 3, 1); - } - - state->s0 ^= v0 ^ v8; - state->s1 ^= v1 ^ v9; - state->s2 ^= v2 ^ v10; - state->s3 ^= v3 ^ v11; - state->s4 ^= v4 ^ v12; - state->s5 ^= v5 ^ v13; - state->s6 ^= v6 ^ v14; - state->s7 ^= v7 ^ v15; - -} - - -__device__ void blake2b_compress_4w( - struct partialState* state, uint64_t* m, - uint32_t step, uint32_t idx, - bool lastChunk = false, size_t lastChunkSize = 0) -{ - - uint64_t a, b, c, d; - - uint64_t counter = (idx == 0 ? step : 0); - - a = state->a; - b = state->b; - c = blake2b_IV[idx]; - - if (lastChunk) { - if (idx == 0) - d = blake2b_IV[4] ^ (step - 1) * BLAKE_BLOCKBYTES + lastChunkSize; - else if (idx == 2) - d = blake2b_IV[6] ^ (uint64_t) -1; - else - d = blake2b_IV[idx + 4]; - } else { - d = blake2b_IV[idx + 4] ^ counter * BLAKE_BLOCKBYTES; - } - - __syncthreads(); - - for (uint32_t r = 0; r < 12; ++r) { - - uint8_t ref1, ref2; - - ref1 = sigma[r][0] >> 8 * 2 * idx; - ref2 = sigma[r][0] >> 8 * (2 * idx + 1); - - g_shuffle(&a, &b, &c, &d, &m[ref1], &m[ref2]); - - b = __shfl_sync(0xffffffff, b, idx + 1, 4); - c = __shfl_sync(0xffffffff, c, idx + 2, 4); - d = __shfl_sync(0xffffffff, d, idx + 3, 4); - - ref1 = sigma[r][1] >> 8 * 2 * idx; - ref2 = sigma[r][1] >> 8 * (2 * idx + 1); - - g_shuffle(&a, &b, &c, &d, &m[ref1], &m[ref2]); - - b = __shfl_sync(0xffffffff, b, idx - 1, 4); - c = __shfl_sync(0xffffffff, c, idx - 2, 4); - d = __shfl_sync(0xffffffff, d, idx - 3, 4); - - } - - state->a = state->a ^ a ^ c; - state->b = state->b ^ b ^ d; - -} - - -__device__ void computeInitialHash( - const uint32_t* input, uint32_t* buffer, - uint32_t nonce) -{ - - uint64x8 state; - -#pragma unroll - for (int i = 0; i < 32; i++) - buffer[i] = 0; - - state.s0 = blake2b_Init[0]; - state.s1 = blake2b_Init[1]; - state.s2 = blake2b_Init[2]; - state.s3 = blake2b_Init[3]; - state.s4 = blake2b_Init[4]; - state.s5 = blake2b_Init[5]; - state.s6 = blake2b_Init[6]; - state.s7 = blake2b_Init[7]; - - buffer[0] = ALGO_LANES; - buffer[1] = ALGO_OUTLEN; - buffer[2] = ALGO_MCOST; - buffer[3] = ALGO_PASSES; - buffer[4] = ALGO_VERSION; - buffer[6] = 80; - -#pragma unroll - for (int i = 0; i < 19; i++) - buffer[7 + i] = input[i]; - - buffer[26] = nonce; - buffer[27] = 80; - -#pragma unroll - for (int i = 0; i < 4; i++) - buffer[28 + i] = input[i]; - - blake2b_compress_1w(&state, (uint64_t*) buffer, 1); - - -#pragma unroll - for (int i = 0; i < 15; i++) - buffer[i] = input[i + 4]; - - buffer[15] = nonce; - -#pragma unroll - for (int i = 16; i < 32; i++) - buffer[i] = 0; - - blake2b_compress_1w(&state, (uint64_t*) buffer, 2, true, 72); - -#pragma unroll - for (int i = 0; i < 32; i++) - buffer[i] = 0; - - - memcpy(&buffer[1], &state, 64); - -} - -__device__ void fillFirstBlock(struct block* memory, uint32_t* buffer) { - - uint32_t row = threadIdx.x / ALGO_LANES; - uint32_t column = threadIdx.x % ALGO_LANES; - - struct block* memCell = (memory + (blockIdx.x * blockDim.y + threadIdx.y) * ALGO_TOTAL_BLOCKS) - + row * ALGO_LANES + column; - - uint64_t* buffer_64 = (uint64_t*) buffer; - uint64x8 state; - - state.s0 = blake2b_Init[0]; - state.s1 = blake2b_Init[1]; - state.s2 = blake2b_Init[2]; - state.s3 = blake2b_Init[3]; - state.s4 = blake2b_Init[4]; - state.s5 = blake2b_Init[5]; - state.s6 = blake2b_Init[6]; - state.s7 = blake2b_Init[7]; - - buffer[0] = 1024; - buffer[17] = row; - buffer[18] = column; - - blake2b_compress_1w(&state, buffer_64, 1, true, 76); - - memCell->data[0] = state.s0; - memCell->data[1] = state.s1; - memCell->data[2] = state.s2; - memCell->data[3] = state.s3; - - for (int i = 0; i < 8; i++) { - buffer_64[i + 8] = 0; - } - - buffer_64[0] = state.s0; - buffer_64[1] = state.s1; - buffer_64[2] = state.s2; - buffer_64[3] = state.s3; - buffer_64[4] = state.s4; - buffer_64[5] = state.s5; - buffer_64[6] = state.s6; - buffer_64[7] = state.s7; - - for (uint8_t i = 1; i < 31; i++) { - - state.s0 = blake2b_Init[0]; - state.s1 = blake2b_Init[1]; - state.s2 = blake2b_Init[2]; - state.s3 = blake2b_Init[3]; - state.s4 = blake2b_Init[4]; - state.s5 = blake2b_Init[5]; - state.s6 = blake2b_Init[6]; - state.s7 = blake2b_Init[7]; - - blake2b_compress_1w(&state, buffer_64, 1, true, 64); - - buffer_64[0] = state.s0; - buffer_64[1] = state.s1; - buffer_64[2] = state.s2; - buffer_64[3] = state.s3; - buffer_64[4] = state.s4; - buffer_64[5] = state.s5; - buffer_64[6] = state.s6; - buffer_64[7] = state.s7; - - memCell->data[(i << 2) + 0] = state.s0; - memCell->data[(i << 2) + 1] = state.s1; - memCell->data[(i << 2) + 2] = state.s2; - memCell->data[(i << 2) + 3] = state.s3; - - } - - memCell->data[124] = state.s4; - memCell->data[125] = state.s5; - memCell->data[126] = state.s6; - memCell->data[127] = state.s7; - -} - -__global__ void argon2_initialize_kernel(struct block* memory, uint32_t startNonce) -{ - - uint32_t buffer[32]; - const uint32_t nonce = (blockIdx.x*blockDim.y+threadIdx.y) + startNonce; - - computeInitialHash(d_data, buffer, nonce); - fillFirstBlock(memory, buffer); - -} - -__global__ void argon2_finalize_kernel( - block* memory, uint32_t startNonce, - uint64_t target, uint32_t* resNonces) -{ - - extern __shared__ uint32_t input_t[]; - uint32_t* input = &(input_t[threadIdx.y*258]); - uint64_t* input_64=(uint64_t*)input; - - uint32_t idx = threadIdx.x; - uint32_t jobId = blockIdx.x * blockDim.y + threadIdx.y; - uint32_t nonce = jobId + startNonce; - - uint32_t* memLane = (uint32_t*) ((memory + jobId * ALGO_TOTAL_BLOCKS)); - partialState state; - - load_block(&input[1], memLane, idx); - - input[0] = 32; - - state.a = blake2b_Init_928[idx]; - state.b = blake2b_Init_928[idx + 4]; - - blake2b_compress_4w(&state, &input_64[0], 1, idx); - blake2b_compress_4w(&state, &input_64[16], 2, idx); - blake2b_compress_4w(&state, &input_64[32], 3, idx); - blake2b_compress_4w(&state, &input_64[48], 4, idx); - blake2b_compress_4w(&state, &input_64[64], 5, idx); - blake2b_compress_4w(&state, &input_64[80], 6, idx); - blake2b_compress_4w(&state, &input_64[96], 7, idx); - blake2b_compress_4w(&state, &input_64[112], 8, idx); - - zero_buffer(&input[0], idx); - input[0]=input[256]; - - blake2b_compress_4w(&state, &input_64[0], 9, idx, true, 4); - - input_64[idx] = state.a; - - __syncthreads(); - - - if (idx == 0 && input_64[3] <= target) { - resNonces[0] = nonce; - } - -} - -__host__ void set_data(const void* data) { - - cudaMemcpyToSymbol(d_data, data, INPUT_LEN); - -} diff --git a/src/crypto/argon2gpu/cuda/blake2b-kernels.h b/src/crypto/argon2gpu/cuda/blake2b-kernels.h deleted file mode 100644 index 0aca110328..0000000000 --- a/src/crypto/argon2gpu/cuda/blake2b-kernels.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2018-2021 Ehsan Dalvand , Alireza Jahandideh - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef BLAKE2B_KERNELS_H -#define BLAKE2B_KERNELS_H - - -enum algo_constants { - ARGON2_BLOCK_SIZE = 1024, - ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, - ARGON2_PREHASH_DIGEST_LENGTH = 64, - ARGON2_PREHASH_SEED_LENGTH = 72, - BLAKE_BLOCKBYTES = 128 -}; - -enum algo_params { - ALGO_LANES = 8, - ALGO_MCOST = 500, - ALGO_PASSES = 2, - ALGO_OUTLEN = 32, - ALGO_VERSION = 0x10, - ALGO_TOTAL_BLOCKS = (ALGO_MCOST / (4 * ALGO_LANES)) * 4 * ALGO_LANES, - ALGO_LANE_LENGHT = ALGO_TOTAL_BLOCKS / ALGO_LANES, - ALGO_SEGMENT_BLOCKS = ALGO_LANE_LENGHT / 4 -}; - -struct partialState { - uint64_t a, b; -}; - -struct block { - uint64_t data[ARGON2_QWORDS_IN_BLOCK]; -}; - -struct block_th { - uint64_t a, b, c, d; -}; - -struct uint64x8 { - uint64_t s0, s1, s2, s3, s4, s5, s6, s7; -}; - -__device__ __forceinline__ -void zero_buffer(uint32_t* buffer, const uint32_t idx) { - buffer[idx] = 0; - buffer[idx + 4] = 0; - buffer[idx + 8] = 0; - buffer[idx + 12] = 0; - buffer[idx + 16] = 0; - buffer[idx + 20] = 0; - buffer[idx + 24] = 0; - buffer[idx + 28] = 0; -} - -static __constant__ const uint64_t sigma[12][2] = { - - { 506097522914230528,1084818905618843912 }, - { 436021270388410894, 217587900856929281 }, - { 940973067642603531, 290764780619369994 }, - { 1011915791265892615, 580682894302053890 }, - { 1083683067090239497, 937601969488068878 }, - { 218436676723543042, 648815278989708548 }, - { 721716194318550284, 794887571959580416 }, - { 649363922558061325, 721145521830297605 }, - { 576464098234863366, 363107122416517644 }, - { 360576072368521738, 3672381957147407 }, - { 506097522914230528, 1084818905618843912 }, - { 436021270388410894, 217587900856929281 }, -}; - -static __constant__ const uint64_t blake2b_Init[8] = { - 0x6A09E667F2BDC948, - 0xBB67AE8584CAA73B, - 0x3C6EF372FE94F82B, - 0xA54FF53A5F1D36F1, - 0x510E527FADE682D1, - 0x9B05688C2B3E6C1F, - 0x1F83D9ABFB41BD6B, - 0x5BE0CD19137E2179 -}; - -static __constant__ const uint64_t blake2b_Init_928[8] = { - 0x6A09E667F2BDC928, - 0xBB67AE8584CAA73B, - 0x3C6EF372FE94F82B, - 0xA54FF53A5F1D36F1, - 0x510E527FADE682D1, - 0x9B05688C2B3E6C1F, - 0x1F83D9ABFB41BD6B, - 0x5BE0CD19137E2179 -}; - -static __constant__ const uint64_t blake2b_IV[8] = { - 7640891576956012808UL, - 13503953896175478587UL, - 4354685564936845355UL, - 11912009170470909681UL, - 5840696475078001361UL, - 11170449401992604703UL, - 2270897969802886507UL, - 6620516959819538809UL -}; - -__device__ __forceinline__ uint64_t rotate64(const uint64_t x, const uint32_t n) { - return (x >> n) | (x << (64 - n)); -} - -__device__ __forceinline__ -void g_shuffle( - uint64_t* a, uint64_t* b, - uint64_t* c, uint64_t* d, - const uint64_t* m1, const uint64_t* m2) -{ - - *a = *a + *b + *m1; - *d = rotate64(*d ^ *a, 32); - *c = *c + *d; - *b = rotate64(*b ^ *c, 24); - *a = *a + *b + *m2; - *d = rotate64(*d ^ *a, 16); - *c = *c + *d; - *b = rotate64(*b ^ *c, 63); - -} - - -__global__ void argon2_initialize_kernel(struct block* memory, uint32_t startNonce); - -__global__ void argon2_finalize_kernel( - block* memory, uint32_t startNonce, - uint64_t target, uint32_t* resNonces); - -__host__ void set_data(const void* data); - -#endif diff --git a/src/crypto/argon2gpu/cuda/cuda-exception.h b/src/crypto/argon2gpu/cuda/cuda-exception.h deleted file mode 100644 index dbd98e98fe..0000000000 --- a/src/crypto/argon2gpu/cuda/cuda-exception.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015, Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_CUDA_CUDAEXCEPTION_H -#define ARGON2_CUDA_CUDAEXCEPTION_H - -#include - -#include - -namespace argon2gpu -{ -namespace cuda -{ - -class CudaException : public std::exception -{ - private: - cudaError_t res; - - public: - CudaException(cudaError_t res) : res(res) {} - - const char *what() const noexcept override - { - return cudaGetErrorString(res); - } - - static void check(cudaError_t res) - { - if (res != cudaSuccess) - { - throw CudaException(res); - } - } -}; - -} // namespace cuda -} // namespace argon2gpu - -#endif // ARGON2_CUDA_CUDAEXCEPTION_H diff --git a/src/crypto/argon2gpu/cuda/device.cpp b/src/crypto/argon2gpu/cuda/device.cpp deleted file mode 100644 index d6028e3e87..0000000000 --- a/src/crypto/argon2gpu/cuda/device.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include "crypto/argon2gpu/cuda/cuda-exception.h" -#include "crypto/argon2gpu/cuda/device.h" - -namespace argon2gpu -{ -namespace cuda -{ -std::string Device::getName() const -{ - cudaDeviceProp prop; - CudaException::check(cudaGetDeviceProperties(&prop, deviceIndex)); - return "CUDA Device '" + std::string(prop.name) + "'"; -} - -std::string Device::getInfo() const -{ - /* FIXME: show some more stuff here: */ - cudaDeviceProp prop; - CudaException::check(cudaGetDeviceProperties(&prop, deviceIndex)); - return "CUDA Device '" + std::string(prop.name) + "'"; -} - -std::size_t Device::getTotalMemory() const -{ - cudaDeviceProp prop; - CudaException::check(cudaGetDeviceProperties(&prop, deviceIndex)); - return prop.totalGlobalMem; -} - -} // namespace cuda -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/cuda/device.h b/src/crypto/argon2gpu/cuda/device.h deleted file mode 100644 index 70bd666159..0000000000 --- a/src/crypto/argon2gpu/cuda/device.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015 Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_CUDA_DEVICE_H -#define ARGON2_CUDA_DEVICE_H - -#include - -namespace argon2gpu -{ -namespace cuda -{ - -class Device -{ - private: - int deviceIndex; - - public: - std::string getName() const; - std::string getInfo() const; - std::size_t getTotalMemory() const; - - int getDeviceIndex() const { return deviceIndex; } - - /** - * @brief Empty constructor. - * NOTE: Calling methods other than the destructor on an instance initialized - * with empty constructor results in undefined behavior. - */ - Device() {} - - Device(int deviceIndex) : deviceIndex(deviceIndex) - { - } - - Device(const Device &) = default; - Device(Device &&) = default; - - Device &operator=(const Device &) = default; -}; - -} // namespace cuda -} // namespace argon2gpu - -#endif // ARGON2_CUDA_DEVICE_H diff --git a/src/crypto/argon2gpu/cuda/global-context.cpp b/src/crypto/argon2gpu/cuda/global-context.cpp deleted file mode 100644 index 40091070e1..0000000000 --- a/src/crypto/argon2gpu/cuda/global-context.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/cuda/cuda-exception.h" -#include "crypto/argon2gpu/cuda/global-context.h" - -namespace argon2gpu -{ -namespace cuda -{ -GlobalContext::GlobalContext() - : devices() -{ - int count; - CudaException::check(cudaGetDeviceCount(&count)); - - devices.reserve(count); - for (int i = 0; i < count; i++) { - devices.emplace_back(i); - } -} - -} // namespace cuda -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/cuda/global-context.h b/src/crypto/argon2gpu/cuda/global-context.h deleted file mode 100644 index d672084dbe..0000000000 --- a/src/crypto/argon2gpu/cuda/global-context.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015, Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_CUDA_GLOBALCONTEXT_H -#define ARGON2_CUDA_GLOBALCONTEXT_H - -#include "crypto/argon2gpu/cuda/device.h" - -#include -#include - -namespace argon2gpu -{ -namespace cuda -{ - -class GlobalContext -{ - private: - std::vector devices; - - public: - const std::vector &getAllDevices() const { return devices; } - - GlobalContext(); -}; - -} // namespace cuda -} // namespace argon2gpu - -#endif // ARGON2_CUDA_GLOBALCONTEXT_H diff --git a/src/crypto/argon2gpu/cuda/kernels.cu b/src/crypto/argon2gpu/cuda/kernels.cu deleted file mode 100644 index 83aee037d5..0000000000 --- a/src/crypto/argon2gpu/cuda/kernels.cu +++ /dev/null @@ -1,942 +0,0 @@ -/* - * Copyright (C) 2018-2021 Ehsan Dalvand - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015 Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/* For IDE: */ -#ifndef __CUDACC__ -#define __CUDACC__ -#endif - -#include "crypto/argon2gpu/cuda/cuda-exception.h" -#include "crypto/argon2gpu/cuda/kernels.h" -#include "crypto/argon2gpu/cuda/blake2b-kernels.h" - -#include - -#include -#ifndef NDEBUG -#include -#endif - -#define ARGON2_D 0 -#define ARGON2_I 1 -#define ARGON2_ID 2 - -#define ARGON2_VERSION_10 0x10 -#define ARGON2_VERSION_13 0x13 - -#define ARGON2_BLOCK_SIZE 1024 -#define ARGON2_QWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 8) -#define ARGON2_SYNC_POINTS 4 - -#define THREADS_PER_LANE 32 -#define QWORDS_PER_THREAD (ARGON2_QWORDS_IN_BLOCK / 32) - -namespace argon2gpu -{ -namespace cuda -{ -using namespace std; - -__device__ uint64_t u64_build(uint32_t hi, uint32_t lo) -{ - return ((uint64_t)hi << 32) | (uint64_t)lo; -} - -__device__ uint32_t u64_lo(uint64_t x) -{ - return (uint32_t)x; -} - -__device__ uint32_t u64_hi(uint64_t x) -{ - return (uint32_t)(x >> 32); -} - -__device__ uint64_t u64_shuffle(uint64_t v, uint32_t thread) -{ - uint32_t lo = u64_lo(v); - uint32_t hi = u64_hi(v); - lo = __shfl(lo, thread); - hi = __shfl(hi, thread); - return u64_build(hi, lo); -} - -struct block_g { - uint64_t data[ARGON2_QWORDS_IN_BLOCK]; -}; - -struct block_th { - uint64_t a, b, c, d; -}; - -__device__ uint64_t cmpeq_mask(uint32_t test, uint32_t ref) -{ - uint32_t x = -(uint32_t)(test == ref); - return u64_build(x, x); -} - -__device__ uint64_t block_th_get(const struct block_th* b, uint32_t idx) -{ - uint64_t res = 0; - res ^= cmpeq_mask(idx, 0) & b->a; - res ^= cmpeq_mask(idx, 1) & b->b; - res ^= cmpeq_mask(idx, 2) & b->c; - res ^= cmpeq_mask(idx, 3) & b->d; - return res; -} - -__device__ void block_th_set(struct block_th* b, uint32_t idx, uint64_t v) -{ - b->a ^= cmpeq_mask(idx, 0) & (v ^ b->a); - b->b ^= cmpeq_mask(idx, 1) & (v ^ b->b); - b->c ^= cmpeq_mask(idx, 2) & (v ^ b->c); - b->d ^= cmpeq_mask(idx, 3) & (v ^ b->d); -} - -__device__ void move_block(struct block_th* dst, const struct block_th* src) -{ - *dst = *src; -} - -__device__ void xor_block(struct block_th* dst, const struct block_th* src) -{ - dst->a ^= src->a; - dst->b ^= src->b; - dst->c ^= src->c; - dst->d ^= src->d; -} - -__device__ void load_block(struct block_th* dst, const struct block_g* src, uint32_t thread) -{ - dst->a = src->data[0 * THREADS_PER_LANE + thread]; - dst->b = src->data[1 * THREADS_PER_LANE + thread]; - dst->c = src->data[2 * THREADS_PER_LANE + thread]; - dst->d = src->data[3 * THREADS_PER_LANE + thread]; -} - -__device__ void load_block_xor(struct block_th* dst, const struct block_g* src, uint32_t thread) -{ - dst->a ^= src->data[0 * THREADS_PER_LANE + thread]; - dst->b ^= src->data[1 * THREADS_PER_LANE + thread]; - dst->c ^= src->data[2 * THREADS_PER_LANE + thread]; - dst->d ^= src->data[3 * THREADS_PER_LANE + thread]; -} - -__device__ void store_block(struct block_g* dst, const struct block_th* src, uint32_t thread) -{ - dst->data[0 * THREADS_PER_LANE + thread] = src->a; - dst->data[1 * THREADS_PER_LANE + thread] = src->b; - dst->data[2 * THREADS_PER_LANE + thread] = src->c; - dst->data[3 * THREADS_PER_LANE + thread] = src->d; -} - -__device__ uint64_t rotr64(uint64_t x, uint32_t n) -{ - return (x >> n) | (x << (64 - n)); -} - -__device__ uint64_t f(uint64_t x, uint64_t y) -{ - uint32_t xlo = u64_lo(x); - uint32_t ylo = u64_lo(y); - return x + y + 2 * u64_build(__umulhi(xlo, ylo), xlo * ylo); -} - -__device__ void g(struct block_th* block) -{ - uint64_t a, b, c, d; - a = block->a; - b = block->b; - c = block->c; - d = block->d; - - a = f(a, b); - d = rotr64(d ^ a, 32); - c = f(c, d); - b = rotr64(b ^ c, 24); - a = f(a, b); - d = rotr64(d ^ a, 16); - c = f(c, d); - b = rotr64(b ^ c, 63); - - block->a = a; - block->b = b; - block->c = c; - block->d = d; -} - -template -__device__ void apply_shuffle(struct block_th* block, uint32_t thread) -{ - for (uint32_t i = 0; i < QWORDS_PER_THREAD; i++) { - uint32_t src_thr = shuffle::apply(thread, i); - - uint64_t v = block_th_get(block, i); - v = u64_shuffle(v, src_thr); - block_th_set(block, i, v); - } -} - -__device__ void transpose(struct block_th* block, uint32_t thread) -{ - uint32_t thread_group = (thread & 0x0C) >> 2; - for (uint32_t i = 1; i < QWORDS_PER_THREAD; i++) { - uint32_t thr = (i << 2) ^ thread; - uint32_t idx = thread_group ^ i; - - uint64_t v = block_th_get(block, idx); - v = u64_shuffle(v, thr); - block_th_set(block, idx, v); - } -} - -struct identity_shuffle { - __device__ static uint32_t apply(uint32_t thread, uint32_t idx) - { - return thread; - } -}; - -struct shift1_shuffle { - __device__ static uint32_t apply(uint32_t thread, uint32_t idx) - { - return (thread & 0x1c) | ((thread + idx) & 0x3); - } -}; - -struct unshift1_shuffle { - __device__ static uint32_t apply(uint32_t thread, uint32_t idx) - { - idx = (QWORDS_PER_THREAD - idx) % QWORDS_PER_THREAD; - - return (thread & 0x1c) | ((thread + idx) & 0x3); - } -}; - -struct shift2_shuffle { - __device__ static uint32_t apply(uint32_t thread, uint32_t idx) - { - uint32_t lo = (thread & 0x1) | ((thread & 0x10) >> 3); - lo = (lo + idx) & 0x3; - return ((lo & 0x2) << 3) | (thread & 0xe) | (lo & 0x1); - } -}; - -struct unshift2_shuffle { - __device__ static uint32_t apply(uint32_t thread, uint32_t idx) - { - idx = (QWORDS_PER_THREAD - idx) % QWORDS_PER_THREAD; - - uint32_t lo = (thread & 0x1) | ((thread & 0x10) >> 3); - lo = (lo + idx) & 0x3; - return ((lo & 0x2) << 3) | (thread & 0xe) | (lo & 0x1); - } -}; - -__device__ void shuffle_block(struct block_th* block, uint32_t thread) -{ - transpose(block, thread); - - g(block); - - apply_shuffle(block, thread); - - g(block); - - apply_shuffle(block, thread); - transpose(block, thread); - - g(block); - - apply_shuffle(block, thread); - - g(block); - - apply_shuffle(block, thread); -} - -__device__ void next_addresses(struct block_th* addr, struct block_th* tmp, uint32_t thread_input, uint32_t thread) -{ - addr->a = u64_build(0, thread_input); - addr->b = 0; - addr->c = 0; - addr->d = 0; - - shuffle_block(addr, thread); - - addr->a ^= u64_build(0, thread_input); - move_block(tmp, addr); - - shuffle_block(addr, thread); - - xor_block(addr, tmp); -} - -__device__ void compute_ref_pos( - uint32_t lanes, - uint32_t segment_blocks, - uint32_t pass, - uint32_t lane, - uint32_t slice, - uint32_t offset, - uint32_t* ref_lane, - uint32_t* ref_index) -{ - uint32_t lane_blocks = ARGON2_SYNC_POINTS * segment_blocks; - - *ref_lane = *ref_lane % lanes; - - uint32_t base; - if (pass != 0) { - base = lane_blocks - segment_blocks; - } else { - if (slice == 0) { - *ref_lane = lane; - } - base = slice * segment_blocks; - } - - uint32_t ref_area_size = base + offset - 1; - if (*ref_lane != lane) { - ref_area_size = min(ref_area_size, base); - } - - *ref_index = __umulhi(*ref_index, *ref_index); - *ref_index = ref_area_size - 1 - __umulhi(ref_area_size, *ref_index); - - if (pass != 0 && slice != ARGON2_SYNC_POINTS - 1) { - *ref_index += (slice + 1) * segment_blocks; - if (*ref_index >= lane_blocks) { - *ref_index -= lane_blocks; - } - } -} - -struct ref { - uint32_t ref_lane; - uint32_t ref_index; -}; - -/* - * Refs hierarchy: - * lanes -> passes -> slices -> blocks - */ -template -__global__ void argon2_precompute_kernel( - struct ref* refs, - uint32_t passes, - uint32_t lanes, - uint32_t segment_blocks) -{ - uint32_t block_id = blockIdx.y * blockDim.y + threadIdx.y; - uint32_t thread = threadIdx.x; - - uint32_t segment_addr_blocks = (segment_blocks + ARGON2_QWORDS_IN_BLOCK - 1) / ARGON2_QWORDS_IN_BLOCK; - uint32_t block = block_id % segment_addr_blocks; - uint32_t segment = block_id / segment_addr_blocks; - - uint32_t slice, pass, pass_id, lane; - if (type == ARGON2_ID) { - slice = segment % (ARGON2_SYNC_POINTS / 2); - lane = segment / (ARGON2_SYNC_POINTS / 2); - pass_id = pass = 0; - } else { - slice = segment % ARGON2_SYNC_POINTS; - pass_id = segment / ARGON2_SYNC_POINTS; - - pass = pass_id % passes; - lane = pass_id / passes; - } - - struct block_th addr, tmp; - - uint32_t thread_input; - switch (thread) { - case 0: - thread_input = pass; - break; - case 1: - thread_input = lane; - break; - case 2: - thread_input = slice; - break; - case 3: - thread_input = lanes * segment_blocks * ARGON2_SYNC_POINTS; - break; - case 4: - thread_input = passes; - break; - case 5: - thread_input = type; - break; - case 6: - thread_input = block + 1; - break; - default: - thread_input = 0; - break; - } - - next_addresses(&addr, &tmp, thread_input, thread); - - refs += segment * segment_blocks; - - for (uint32_t i = 0; i < QWORDS_PER_THREAD; i++) { - uint32_t pos = i * THREADS_PER_LANE + thread; - uint32_t offset = block * ARGON2_QWORDS_IN_BLOCK + pos; - if (offset < segment_blocks) { - uint64_t v = block_th_get(&addr, i); - uint32_t ref_index = u64_lo(v); - uint32_t ref_lane = u64_hi(v); - - compute_ref_pos(lanes, segment_blocks, pass, lane, slice, offset, - &ref_lane, &ref_index); - - refs[offset].ref_index = ref_index; - refs[offset].ref_lane = ref_lane; - } - } -} - -template -__device__ void argon2_core( - struct block_g* memory, - struct block_g* mem_curr, - struct block_th* prev, - struct block_th* tmp, - uint32_t lanes, - uint32_t thread, - uint32_t pass, - uint32_t ref_index, - uint32_t ref_lane) -{ - struct block_g* mem_ref = memory + ref_index * lanes + ref_lane; - - if (version != ARGON2_VERSION_10 && pass != 0) { - load_block(tmp, mem_curr, thread); - load_block_xor(prev, mem_ref, thread); - xor_block(tmp, prev); - } else { - load_block_xor(prev, mem_ref, thread); - move_block(tmp, prev); - } - - shuffle_block(prev, thread); - - xor_block(prev, tmp); - - store_block(mem_curr, prev, thread); -} - -template -__device__ void argon2_step_precompute( - struct block_g* memory, - struct block_g* mem_curr, - struct block_th* prev, - struct block_th* tmp, - const struct ref** refs, - uint32_t lanes, - uint32_t segment_blocks, - uint32_t thread, - uint32_t lane, - uint32_t pass, - uint32_t slice, - uint32_t offset) -{ - uint32_t ref_index, ref_lane; - if (type == ARGON2_I || (type == ARGON2_ID && pass == 0 && - slice < ARGON2_SYNC_POINTS / 2)) { - ref_index = (*refs)->ref_index; - ref_lane = (*refs)->ref_lane; - (*refs)++; - } else { - uint64_t v = u64_shuffle(prev->a, 0); - ref_index = u64_lo(v); - ref_lane = u64_hi(v); - - compute_ref_pos(lanes, segment_blocks, pass, lane, slice, offset, - &ref_lane, &ref_index); - } - - argon2_core(memory, mem_curr, prev, tmp, lanes, thread, pass, - ref_index, ref_lane); -} - -template -__global__ void argon2_kernel_segment_precompute( - struct block_g* memory, - const struct ref* refs, - uint32_t passes, - uint32_t lanes, - uint32_t segment_blocks, - uint32_t pass, - uint32_t slice) -{ - uint32_t job_id = blockIdx.z * blockDim.z + threadIdx.z; - uint32_t lane = blockIdx.y * blockDim.y + threadIdx.y; - uint32_t thread = threadIdx.x; - - uint32_t lane_blocks = ARGON2_SYNC_POINTS * segment_blocks; - - /* select job's memory region: */ - memory += (size_t)job_id * lanes * lane_blocks; - - struct block_th prev, tmp; - - struct block_g* mem_segment = - memory + slice * segment_blocks * lanes + lane; - struct block_g *mem_prev, *mem_curr; - uint32_t start_offset = 0; - if (pass == 0) { - if (slice == 0) { - mem_prev = mem_segment + 1 * lanes; - mem_curr = mem_segment + 2 * lanes; - start_offset = 2; - } else { - mem_prev = mem_segment - lanes; - mem_curr = mem_segment; - } - } else { - mem_prev = mem_segment + (slice == 0 ? lane_blocks * lanes : 0) - lanes; - mem_curr = mem_segment; - } - - load_block(&prev, mem_prev, thread); - - if (type == ARGON2_ID) { - if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2) { - refs += lane * (lane_blocks / 2) + slice * segment_blocks; - refs += start_offset; - } - } else { - refs += (lane * passes + pass) * lane_blocks + slice * segment_blocks; - refs += start_offset; - } - - for (uint32_t offset = start_offset; offset < segment_blocks; ++offset) { - argon2_step_precompute( - memory, mem_curr, &prev, &tmp, &refs, lanes, segment_blocks, - thread, lane, pass, slice, offset); - - mem_curr += lanes; - } -} - -template -__global__ void argon2_kernel_oneshot_precompute( - struct block_g* memory, - const struct ref* refs, - uint32_t passes, - uint32_t lanes, - uint32_t segment_blocks) -{ - uint32_t job_id = blockIdx.z * blockDim.z + threadIdx.z; - uint32_t lane = threadIdx.y; - uint32_t thread = threadIdx.x; - - uint32_t lane_blocks = ARGON2_SYNC_POINTS * segment_blocks; - - /* select job's memory region: */ - memory += (size_t)job_id * lanes * lane_blocks; - - struct block_th prev, tmp; - - struct block_g* mem_lane = memory + lane; - struct block_g* mem_prev = mem_lane + 1 * lanes; - struct block_g* mem_curr = mem_lane + 2 * lanes; - - load_block(&prev, mem_prev, thread); - - if (type == ARGON2_ID) { - refs += lane * (lane_blocks / 2) + 2; - } else { - refs += lane * passes * lane_blocks + 2; - } - - uint32_t skip = 2; - for (uint32_t pass = 0; pass < passes; ++pass) { - for (uint32_t slice = 0; slice < ARGON2_SYNC_POINTS; ++slice) { - for (uint32_t offset = 0; offset < segment_blocks; ++offset) { - if (skip > 0) { - --skip; - continue; - } - - argon2_step_precompute( - memory, mem_curr, &prev, &tmp, &refs, lanes, - segment_blocks, thread, lane, pass, slice, offset); - - mem_curr += lanes; - } - - __syncthreads(); - } - - mem_curr = mem_lane; - } -} - -template -__device__ void argon2_step( - struct block_g* memory, - struct block_g* mem_curr, - struct block_th* prev, - struct block_th* tmp, - struct block_th* addr, - uint32_t lanes, - uint32_t segment_blocks, - uint32_t thread, - uint32_t* thread_input, - uint32_t lane, - uint32_t pass, - uint32_t slice, - uint32_t offset) -{ - uint32_t ref_index, ref_lane; - - if (type == ARGON2_I || (type == ARGON2_ID && pass == 0 && - slice < ARGON2_SYNC_POINTS / 2)) { - uint32_t addr_index = offset % ARGON2_QWORDS_IN_BLOCK; - if (addr_index == 0) { - if (thread == 6) { - ++*thread_input; - } - next_addresses(addr, tmp, *thread_input, thread); - } - - uint32_t thr = addr_index % THREADS_PER_LANE; - uint32_t idx = addr_index / THREADS_PER_LANE; - - uint64_t v = block_th_get(addr, idx); - v = u64_shuffle(v, thr); - ref_index = u64_lo(v); - ref_lane = u64_hi(v); - } else { - uint64_t v = u64_shuffle(prev->a, 0); - ref_index = u64_lo(v); - ref_lane = u64_hi(v); - } - - compute_ref_pos(lanes, segment_blocks, pass, lane, slice, offset, - &ref_lane, &ref_index); - - argon2_core(memory, mem_curr, prev, tmp, lanes, thread, pass, - ref_index, ref_lane); -} - -template -__global__ void argon2_kernel_segment( - struct block_g* memory, - uint32_t passes, - uint32_t lanes, - uint32_t segment_blocks, - uint32_t pass, - uint32_t slice) -{ - uint32_t job_id = blockIdx.z * blockDim.z + threadIdx.z; - uint32_t lane = blockIdx.y * blockDim.y + threadIdx.y; - uint32_t thread = threadIdx.x; - - uint32_t lane_blocks = ARGON2_SYNC_POINTS * segment_blocks; - - /* select job's memory region: */ - memory += (size_t)job_id * lanes * lane_blocks; - - struct block_th prev, addr, tmp; - uint32_t thread_input; - - if (type == ARGON2_I || type == ARGON2_ID) { - switch (thread) { - case 0: - thread_input = pass; - break; - case 1: - thread_input = lane; - break; - case 2: - thread_input = slice; - break; - case 3: - thread_input = lanes * lane_blocks; - break; - case 4: - thread_input = passes; - break; - case 5: - thread_input = type; - break; - default: - thread_input = 0; - break; - } - - if (pass == 0 && slice == 0 && segment_blocks > 2) { - if (thread == 6) { - ++thread_input; - } - next_addresses(&addr, &tmp, thread_input, thread); - } - } - - struct block_g* mem_segment = - memory + slice * segment_blocks * lanes + lane; - struct block_g *mem_prev, *mem_curr; - uint32_t start_offset = 0; - if (pass == 0) { - if (slice == 0) { - mem_prev = mem_segment + 1 * lanes; - mem_curr = mem_segment + 2 * lanes; - start_offset = 2; - } else { - mem_prev = mem_segment - lanes; - mem_curr = mem_segment; - } - } else { - mem_prev = mem_segment + (slice == 0 ? lane_blocks * lanes : 0) - lanes; - mem_curr = mem_segment; - } - - load_block(&prev, mem_prev, thread); - - for (uint32_t offset = start_offset; offset < segment_blocks; ++offset) { - argon2_step( - memory, mem_curr, &prev, &tmp, &addr, lanes, segment_blocks, - thread, &thread_input, lane, pass, slice, offset); - - mem_curr += lanes; - } -} - -template -__global__ void argon2_kernel_oneshot( - struct block_g* memory, - uint32_t passes, - uint32_t lanes, - uint32_t segment_blocks) -{ - uint32_t job_id = blockIdx.z * blockDim.z + threadIdx.z; - uint32_t lane = threadIdx.y; - uint32_t thread = threadIdx.x; - - uint32_t lane_blocks = ARGON2_SYNC_POINTS * segment_blocks; - - /* select job's memory region: */ - memory += (size_t)job_id * lanes * lane_blocks; - - struct block_th prev, addr, tmp; - uint32_t thread_input; - - if (type == ARGON2_I || type == ARGON2_ID) { - switch (thread) { - case 1: - thread_input = lane; - break; - case 3: - thread_input = lanes * lane_blocks; - break; - case 4: - thread_input = passes; - break; - case 5: - thread_input = type; - break; - default: - thread_input = 0; - break; - } - - if (segment_blocks > 2) { - if (thread == 6) { - ++thread_input; - } - next_addresses(&addr, &tmp, thread_input, thread); - } - } - - struct block_g* mem_lane = memory + lane; - struct block_g* mem_prev = mem_lane + 1 * lanes; - struct block_g* mem_curr = mem_lane + 2 * lanes; - - load_block(&prev, mem_prev, thread); - - uint32_t skip = 2; - for (uint32_t pass = 0; pass < passes; ++pass) { - for (uint32_t slice = 0; slice < ARGON2_SYNC_POINTS; ++slice) { - for (uint32_t offset = 0; offset < segment_blocks; ++offset) { - if (skip > 0) { - --skip; - continue; - } - - argon2_step( - memory, mem_curr, &prev, &tmp, &addr, lanes, - segment_blocks, thread, &thread_input, lane, pass, - slice, offset); - - mem_curr += lanes; - } - - __syncthreads(); - - if (type == ARGON2_I || type == ARGON2_ID) { - if (thread == 2) { - ++thread_input; - } - if (thread == 6) { - thread_input = 0; - } - } - } - if (type == ARGON2_I) { - if (thread == 0) { - ++thread_input; - } - if (thread == 2) { - thread_input = 0; - } - } - mem_curr = mem_lane; - } - - // xor last column and store the result for the finalize step - __syncthreads(); - thread = threadIdx.x + threadIdx.y * THREADS_PER_LANE; - uint32_t* mem_last_col = (uint32_t*)(memory + lanes * ( lane_blocks - 1 )); - uint32_t buf = 0; - for (uint32_t i=0; i(lanes) * segmentBlocks * ARGON2_SYNC_POINTS * ARGON2_BLOCK_SIZE * batchSize; - - CudaException::check(cudaMalloc(&memory, memorySize)); - CudaException::check(cudaMalloc((void**) &d_res_nonce, sizeof(uint32_t))); - -} - -KernelRunner::~KernelRunner() -{ - if (start != nullptr) { - cudaEventDestroy(start); - } - if (end != nullptr) { - cudaEventDestroy(end); - } - if (stream != nullptr) { - cudaStreamDestroy(stream); - } - if (memory != nullptr) { - cudaFree(memory); - } - if (refs != nullptr) { - cudaFree(refs); - } - if (d_res_nonce != nullptr) { - cudaFree(d_res_nonce); - } - cudaDeviceReset(); - -} - -void KernelRunner::runKernelOneshot(uint32_t lanesPerBlock, - uint32_t jobsPerBlock) -{ - - struct block_g* memory_blocks = (struct block_g*)memory; - dim3 blocks = dim3(1, 1, batchSize / jobsPerBlock); - dim3 threads = dim3(THREADS_PER_LANE, lanes, jobsPerBlock); - - if (version == ARGON2_VERSION_10) { - argon2_kernel_oneshot - <<>>( - memory_blocks, passes, lanes, segmentBlocks); - } else { - argon2_kernel_oneshot - <<>>( - memory_blocks, passes, lanes, segmentBlocks); - } - -} - - -void KernelRunner::init(const void* input){ - setCudaDevice(deviceIndex); - CudaException::check(cudaMemset(d_res_nonce, std::numeric_limits::max(), sizeof(uint32_t))); - set_data(input); -} - -void KernelRunner::fillFirstBlocks(uint32_t startNonce) -{ - uint32_t jobsPerBlock = (batchSize<16) ? 1 : 16; - dim3 blocks = dim3(batchSize / jobsPerBlock, 1, 1); - dim3 threads = dim3(lanes*2, jobsPerBlock, 1); - - argon2_initialize_kernel<<>>((struct block*)memory, startNonce); - -} - -void KernelRunner::finalize(const uint32_t startNonce, const uint64_t target) -{ - uint32_t jobsPerBlock = (batchSize<16) ? 1 : 16; - dim3 blocks = dim3(batchSize / jobsPerBlock, 1, 1); - dim3 threads = dim3(4, jobsPerBlock, 1); - argon2_finalize_kernel<<>>((struct block*)memory, startNonce, target, d_res_nonce); - - CudaException::check(cudaDeviceSynchronize()); - -} - - -uint32_t KernelRunner::readResultNonce() -{ - CudaException::check(cudaMemcpy(&res_nonce, d_res_nonce, sizeof(uint32_t), cudaMemcpyDeviceToHost)); - return res_nonce; -} - -void KernelRunner::run(uint32_t lanesPerBlock, uint32_t jobsPerBlock) -{ - setCudaDevice(deviceIndex); - runKernelOneshot(lanesPerBlock, jobsPerBlock); -} - - -} // namespace cuda -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/cuda/kernels.h b/src/crypto/argon2gpu/cuda/kernels.h deleted file mode 100644 index e808345826..0000000000 --- a/src/crypto/argon2gpu/cuda/kernels.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2018-2021 Ehsan Dalvand - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015 Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_CUDA_KERNELS_H -#define ARGON2_CUDA_KERNELS_H - -#include -#include - -/* workaround weird CMake/CUDA bug: */ -#ifdef argon2 -#undef argon2 -#endif - -namespace argon2gpu -{ -namespace cuda -{ - -class KernelRunner -{ - private: - std::uint32_t type, version; - std::uint32_t passes, lanes, segmentBlocks; - std::uint32_t batchSize; - bool bySegment; - bool precompute; - int deviceIndex; - - cudaEvent_t start, end; - cudaStream_t stream; - void *memory; - void *refs; - - std::uint32_t res_nonce; - std::uint32_t *d_res_nonce; - - void runKernelOneshot(std::uint32_t lanesPerBlock, - std::uint32_t jobsPerBlock); - - public: - std::uint32_t getMinLanesPerBlock() const { return bySegment ? 1 : lanes; } - std::uint32_t getMaxLanesPerBlock() const { return lanes; } - - std::uint32_t getMinJobsPerBlock() const { return 1; } - std::uint32_t getMaxJobsPerBlock() const { return batchSize; } - - std::uint32_t getBatchSize() const { return batchSize; } - - KernelRunner(std::uint32_t type, std::uint32_t version, - std::uint32_t passes, std::uint32_t lanes, - std::uint32_t segmentBlocks, std::uint32_t batchSize, - bool bySegment, bool precompute, int deviceIndex); - ~KernelRunner(); - - void run(std::uint32_t lanesPerBlock, std::uint32_t jobsPerBlock); - void init(const void* input); - void fillFirstBlocks(const std::uint32_t start_nonce); - void finalize(const std::uint32_t startNonce, const std::uint64_t target); - std::uint32_t readResultNonce(); - -}; - -} // namespace cuda -} // namespace argon2gpu - -#endif // ARGON2_CUDA_KERNELS_H diff --git a/src/crypto/argon2gpu/cuda/processing-unit.cpp b/src/crypto/argon2gpu/cuda/processing-unit.cpp deleted file mode 100644 index 340dd6fb6c..0000000000 --- a/src/crypto/argon2gpu/cuda/processing-unit.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2018-2021 Ehsan Dalvand - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015 Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/cuda/cuda-exception.h" -#include "crypto/argon2gpu/cuda/processing-unit.h" -#include - - -namespace argon2gpu -{ -namespace cuda -{ - -ProcessingUnit::ProcessingUnit( - const ProgramContext* programContext, - const Argon2Params* params, - const Device* device, - std::size_t batchSize, - bool bySegment, - bool precomputeRefs) - : programContext(programContext), params(params), device(device), - runner(programContext->getArgon2Type(), - programContext->getArgon2Version(), - params->getTimeCost(), - params->getLanes(), - params->getSegmentBlocks(), - batchSize, - bySegment, - precomputeRefs, - device->getDeviceIndex()), - bestLanesPerBlock(params->getLanes()), - bestJobsPerBlock(1){} - - -std::uint32_t ProcessingUnit::scanNonces( - const void* input, const std::uint32_t startNonce, - const std::uint64_t target) -{ - runner.init(input); - runner.fillFirstBlocks(startNonce); - runner.run(bestLanesPerBlock, bestJobsPerBlock); - runner.finalize(startNonce, target); - return runner.readResultNonce(); -} - -} // namespace cuda -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/cuda/processing-unit.h b/src/crypto/argon2gpu/cuda/processing-unit.h deleted file mode 100644 index fe25755b81..0000000000 --- a/src/crypto/argon2gpu/cuda/processing-unit.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2018-2021 Ehsan Dalvand - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015 Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_CUDA_PROCESSINGUNIT_H -#define ARGON2_CUDA_PROCESSINGUNIT_H - -#include - -#include "crypto/argon2gpu/common.h" -#include "crypto/argon2gpu/cuda/program-context.h" -#include "crypto/argon2gpu/cuda/kernels.h" - -namespace argon2gpu -{ -namespace cuda -{ -class ProcessingUnit -{ -private: - const ProgramContext* programContext; - const Argon2Params* params; - const Device* device; - - KernelRunner runner; - std::uint32_t bestLanesPerBlock; - std::uint32_t bestJobsPerBlock; - -public: - std::size_t getBatchSize() const { return runner.getBatchSize(); } - - ProcessingUnit( - const ProgramContext* programContext, - const Argon2Params* params, - const Device* device, - std::size_t batchSize, - bool bySegment = true, - bool precomputeRefs = false); - - std::uint32_t scanNonces( - const void* input, const std::uint32_t startNonce, - const std::uint64_t target); -}; - -} // namespace cuda -} // namespace argon2gpu - -#endif // ARGON2_CUDA_PROCESSINGUNIT_H diff --git a/src/crypto/argon2gpu/cuda/program-context.cpp b/src/crypto/argon2gpu/cuda/program-context.cpp deleted file mode 100644 index c32e4dc2ce..0000000000 --- a/src/crypto/argon2gpu/cuda/program-context.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/cuda/program-context.h" -#include "crypto/argon2gpu/cuda/kernels.h" - -#define THREADS_PER_LANE 32 - -namespace argon2gpu -{ -namespace cuda -{ -ProgramContext::ProgramContext( - const GlobalContext* globalContext, - const std::vector& devices, - Type type, - Version version) - : globalContext(globalContext), type(type), version(version) -{ -} - -} // namespace cuda -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/cuda/program-context.h b/src/crypto/argon2gpu/cuda/program-context.h deleted file mode 100644 index 0a9d4fcc6b..0000000000 --- a/src/crypto/argon2gpu/cuda/program-context.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015 Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_CUDA_PROGRAMCONTEXT_H -#define ARGON2_CUDA_PROGRAMCONTEXT_H - -#include "crypto/argon2gpu/common.h" -#include "crypto/argon2gpu/cuda/global-context.h" - -namespace argon2gpu -{ -namespace cuda -{ - -class ProgramContext -{ -private: - const GlobalContext* globalContext; - - Type type; - Version version; - -public: - const GlobalContext* getGlobalContext() const { return globalContext; } - - Type getArgon2Type() const { return type; } - Version getArgon2Version() const { return version; } - - ProgramContext( - const GlobalContext* globalContext, - const std::vector& devices, - Type type, - Version version); -}; - -} // namespace cuda -} // namespace argon2gpu - -#endif // ARGON2_CUDA_PROGRAMCONTEXT_H diff --git a/src/crypto/argon2gpu/opencl/cl.hpp b/src/crypto/argon2gpu/opencl/cl.hpp deleted file mode 100644 index 4d5a646ec3..0000000000 --- a/src/crypto/argon2gpu/opencl/cl.hpp +++ /dev/null @@ -1,13089 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2015 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and - * OpenCL 1.2 (rev 15) - * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes - * - * Additions and fixes from: - * Brian Cole, March 3rd 2010 and April 2012 - * Matt Gruenke, April 2012. - * Bruce Merry, February 2013. - * Tom Deakin and Simon McIntosh-Smith, July 2013 - * - * \version 1.2.8 - * \date October 2015 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.2 (revision 09) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -#ifdef _WIN32 - -#include - -#if defined(USE_DX_INTEROP) -#include -#include -#endif -#endif // _WIN32 - -#if defined(_MSC_VER) -#include -#endif // _MSC_VER - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#else -#include -#endif // !__APPLE__ - -#if (_MSC_VER >= 1700) || (__cplusplus >= 201103L) -#define CL_HPP_RVALUE_REFERENCES_SUPPORTED -#define CL_HPP_CPP11_ATOMICS_SUPPORTED -#include -#endif - -#if (__cplusplus >= 201103L) -#define CL_HPP_NOEXCEPT noexcept -#else -#define CL_HPP_NOEXCEPT -#endif - -// To avoid accidentally taking ownership of core OpenCL types -// such as cl_kernel constructors are made explicit -// under OpenCL 1.2 -#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS explicit -#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -// Define deprecated prefixes and suffixes to ensure compilation -// in case they are not pre-defined -#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include -#include -#include - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -#endif // #if defined(__CL_ENABLE_EXCEPTIONS) - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(__ANDROID__) || defined(linux) || defined(__APPLE__) || defined(__MACOSX) -#include -#endif // linux - -#include - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl -{ - -class Memory; - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __INIT_CL_EXT_FCN_PTR(name) \ - if (!pfn_##name) \ - { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if (!pfn_##name) \ - { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -#if defined(CL_VERSION_1_2) -#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ - if (!pfn_##name) \ - { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddressForPlatform(platform, #name); \ - if (!pfn_##name) \ - { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; -class Buffer; - -#if defined(__CL_ENABLE_EXCEPTIONS) -/*! \brief Exception class - * - * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. - */ -class Error : public std::exception -{ - private: - cl_int err_; - const char *errStr_; - - public: - /*! \brief Create a new CL error exception for a given error code - * and corresponding message. - * - * \param err error code value. - * - * \param errStr a descriptive string that must remain in scope until - * handling of the exception has concluded. If set, it - * will be returned by what(). - */ - Error(cl_int err, const char *errStr = NULL) : err_(err), errStr_(errStr) - { - } - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char *what() const throw() - { - if (errStr_ == NULL) - { - return "empty"; - } - else - { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - -namespace detail -{ -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler( - cl_int err, - const char *errStr = NULL) -{ - if (err != CL_SUCCESS) - { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler(cl_int err, const char *errStr = NULL) -{ - (void)errStr; // suppress unused variable warning - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS -} // namespace detail - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#if defined(CL_VERSION_1_2) -#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) -#endif // #if defined(CL_VERSION_1_2) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __COPY_ERR __ERR_STR(cl::copy) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#if defined(CL_VERSION_1_2) -#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) -#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) -#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#if defined(CL_VERSION_1_2) -#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) -#endif // #if defined(CL_VERSION_1_2) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#if defined(CL_VERSION_1_2) -#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) -#define __LINK_PROGRAM_ERR __ERR_STR(clLinkProgram) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) -#endif // #if defined(CL_VERSION_1_2) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - -#define __RETAIN_ERR __ERR_STR(Retain Object) -#define __RELEASE_ERR __ERR_STR(Release Object) -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) -#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) - -/** - * CL 1.2 version that uses device fission. - */ -#if defined(CL_VERSION_1_2) -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) -#else -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // #if defined(CL_VERSION_1_2) - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) -#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) -#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#endif // #if defined(CL_VERSION_1_1) - -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/** - * CL 1.2 marker and barrier commands - */ -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) -#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) -#endif // #if defined(CL_VERSION_1_2) - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - - * \note Deprecated. Please use std::string as default or - * re-define the string class to match the std::string - * interface by defining STRING_CLASS - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -{ - private: - ::size_t size_; - char *str_; - - public: - //! \brief Constructs an empty string, allocating no memory. - string(void) : size_(0), str_(NULL) - { - } - - /*! \brief Constructs a string populated from an arbitrary value of - * specified size. - * - * An extra '\0' is added, in case none was contained in str. - * - * \param str the initial value of the string instance. Note that '\0' - * characters receive no special treatment. If NULL, - * the string is left empty, with a size of 0. - * - * \param size the number of characters to copy from str. - */ - string(const char *str, ::size_t size) : size_(size), - str_(NULL) - { - if (size > 0) - { - str_ = new char[size_ + 1]; - if (str_ != NULL) - { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else - { - size_ = 0; - } - } - } - - /*! \brief Constructs a string populated from a null-terminated value. - * - * \param str the null-terminated initial value of the string instance. - * If NULL, the string is left empty, with a size of 0. - */ - string(const char *str) : size_(0), - str_(NULL) - { - if (str) - { - size_ = ::strlen(str); - } - if (size_ > 0) - { - str_ = new char[size_ + 1]; - if (str_ != NULL) - { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - } - } - - void resize(::size_t n) - { - if (size_ == n) - { - return; - } - if (n == 0) - { - if (str_) - { - delete[] str_; - } - str_ = NULL; - size_ = 0; - } - else - { - char *newString = new char[n + 1]; - ::size_t copySize = n; - if (size_ < n) - { - copySize = size_; - } - size_ = n; - - if (str_) - { - memcpy(newString, str_, (copySize + 1) * sizeof(char)); - } - if (copySize < size_) - { - memset(newString + copySize, 0, size_ - copySize); - } - newString[size_] = '\0'; - - delete[] str_; - str_ = newString; - } - } - - const char &operator[](::size_t pos) const - { - return str_[pos]; - } - - char &operator[](::size_t pos) - { - return str_[pos]; - } - - /*! \brief Copies the value of another string to this one. - * - * \param rhs the string to copy. - * - * \returns a reference to the modified instance. - */ - string &operator=(const string &rhs) - { - if (this == &rhs) - { - return *this; - } - - if (str_ != NULL) - { - delete[] str_; - str_ = NULL; - size_ = 0; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) - { - str_ = NULL; - size_ = 0; - } - else - { - str_ = new char[rhs.size_ + 1]; - size_ = rhs.size_; - - if (str_ != NULL) - { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else - { - size_ = 0; - } - } - - return *this; - } - - /*! \brief Constructs a string by copying the value of another instance. - * - * \param rhs the string to copy. - */ - string(const string &rhs) : size_(0), - str_(NULL) - { - *this = rhs; - } - - //! \brief Destructor - frees memory used to hold the current value. - ~string() - { - delete[] str_; - str_ = NULL; - } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t size(void) const { return size_; } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t length(void) const { return size(); } - - /*! \brief Returns a pointer to the private copy held by this instance, - * or "" if empty/unset. - */ - const char *c_str(void) const { return (str_) ? str_ : ""; } -}; -typedef cl::string STRING_CLASS; -#endif // #elif !defined(__USE_DEV_STRING) - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * - * \note Deprecated. Please use std::vector as default or - * re-define the vector class to match the std::vector - * interface by defining VECTOR_CLASS - - * \note Not recommended for use with custom objects as - * current implementation will construct N elements - * - * std::vector functionality. - * \brief Fixed sized vector compatible with std::vector. - * - * \note - * This differs from std::vector<> not just in memory allocation, - * but also in terms of when members are constructed, destroyed, - * and assigned instead of being copy constructed. - * - * \param T type of element contained in the vector. - * - * \param N maximum size of the vector. - */ -template -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector -{ - private: - T data_[N]; - unsigned int size_; - - public: - //! \brief Constructs an empty vector with no memory allocated. - vector() : size_(static_cast(0)) - { - } - - //! \brief Deallocates the vector's memory and destroys all of its elements. - ~vector() - { - clear(); - } - - //! \brief Returns the number of elements currently contained. - unsigned int size(void) const - { - return size_; - } - - /*! \brief Empties the vector of all elements. - * \note - * This does not deallocate memory but will invoke destructors - * on contained elements. - */ - void clear() - { - while (!empty()) - { - pop_back(); - } - } - - /*! \brief Appends an element after the last valid element. - * Calling this on a vector that has reached capacity will throw an - * exception if exceptions are enabled. - */ - void push_back(const T &x) - { - if (size() < N) - { - new (&data_[size_]) T(x); - size_++; - } - else - { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Removes the last valid element from the vector. - * Calling this on an empty vector will throw an exception - * if exceptions are enabled. - */ - void pop_back(void) - { - if (size_ != 0) - { - --size_; - data_[size_].~T(); - } - else - { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Constructs with a value copied from another. - * - * \param vec the vector to copy. - */ - vector(const vector &vec) : size_(vec.size_) - { - if (size_ != 0) - { - assign(vec.begin(), vec.end()); - } - } - - /*! \brief Constructs with a specified number of initial elements. - * - * \param size number of initial elements. - * - * \param val value of initial elements. - */ - vector(unsigned int size, const T &val = T()) : size_(0) - { - for (unsigned int i = 0; i < size; i++) - { - push_back(val); - } - } - - /*! \brief Overwrites the current content with that copied from another - * instance. - * - * \param rhs vector to copy. - * - * \returns a reference to this. - */ - vector &operator=(const vector &rhs) - { - if (this == &rhs) - { - return *this; - } - - if (rhs.size_ != 0) - { - assign(rhs.begin(), rhs.end()); - } - else - { - clear(); - } - - return *this; - } - - /*! \brief Tests equality against another instance. - * - * \param vec the vector against which to compare. - */ - bool operator==(vector &vec) - { - if (size() != vec.size()) - { - return false; - } - - for (unsigned int i = 0; i < size(); ++i) - { - if (operator[](i) != vec[i]) - { - return false; - } - } - return true; - } - - //! \brief Conversion operator to T*. - operator T *() { return data_; } - - //! \brief Conversion operator to const T*. - operator const T *() const { return data_; } - - //! \brief Tests whether this instance has any elements. - bool empty(void) const - { - return size_ == 0; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int max_size(void) const - { - return N; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int capacity() const - { - return N; - } - - //! \brief Resizes the vector to the given size - void resize(unsigned int newSize, T fill = T()) - { - if (newSize > N) - { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - else - { - while (size_ < newSize) - { - new (&data_[size_]) T(fill); - size_++; - } - while (size_ > newSize) - { - --size_; - data_[size_].~T(); - } - } - } - - /*! \brief Returns a reference to a given element. - * - * \param index which element to access. * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - T &operator[](int index) - { - return data_[index]; - } - - /*! \brief Returns a const reference to a given element. - * - * \param index which element to access. - * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - const T &operator[](int index) const - { - return data_[index]; - } - - /*! \brief Assigns elements of the vector based on a source iterator range. - * - * \param start Beginning iterator of source range - * \param end Enditerator of source range - * - * \note - * Will throw an exception if exceptions are enabled and size exceeded. - */ - template - void assign(I start, I end) - { - clear(); - while (start != end) - { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Const iterator class for vectors - */ - class iterator - { - private: - const vector *vec_; - int index_; - - /** - * Internal iterator constructor to capture reference - * to the vector it iterates over rather than taking - * the vector by copy. - */ - iterator(const vector &vec, int index) : vec_(&vec) - { - if (!vec.empty()) - { - index_ = index; - } - else - { - index_ = -1; - } - } - - public: - iterator(void) : index_(-1), - vec_(NULL) - { - } - - iterator(const iterator &rhs) : vec_(rhs.vec_), - index_(rhs.index_) - { - } - - ~iterator(void) {} - - static iterator begin(const cl::vector &vec) - { - iterator i(vec, 0); - - return i; - } - - static iterator end(const cl::vector &vec) - { - iterator i(vec, vec.size()); - - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_)); - } - - bool operator!=(iterator i) - { - return (!(*this == i)); - } - - iterator &operator++() - { - ++index_; - return *this; - } - - iterator operator++(int) - { - iterator retVal(*this); - ++index_; - return retVal; - } - - iterator &operator--() - { - --index_; - return *this; - } - - iterator operator--(int) - { - iterator retVal(*this); - --index_; - return retVal; - } - - const T &operator*() const - { - return (*vec_)[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator begin(void) const - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - iterator end(void) const - { - return iterator::end(*this); - } - - T &front(void) - { - return data_[0]; - } - - T &back(void) - { - return data_[size_]; - } - - const T &front(void) const - { - return data_[0]; - } - - const T &back(void) const - { - return data_[size_ - 1]; - } -} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) - -namespace detail -{ -#define __DEFAULT_NOT_INITIALIZED 1 -#define __DEFAULT_BEING_INITIALIZED 2 -#define __DEFAULT_INITIALIZED 4 - -/* - * Compare and exchange primitives are needed for handling of defaults - */ - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -inline int compare_exchange(std::atomic *dest, int exchange, int comparand) -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -inline int compare_exchange(volatile int *dest, int exchange, int comparand) -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -{ -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - std::atomic_compare_exchange_strong(dest, &comparand, exchange); - return comparand; -#elif _MSC_VER - return (int)(_InterlockedCompareExchange( - (volatile long *)dest, - (long)exchange, - (long)comparand)); -#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED - return (__sync_val_compare_and_swap( - dest, - comparand, - exchange)); -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -} - -inline void fence() -{ -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - std::atomic_thread_fence(std::memory_order_seq_cst); -#elif _MSC_VER // !CL_HPP_CPP11_ATOMICS_SUPPORTED - _ReadWriteBarrier(); -#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED - __sync_synchronize(); -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -} -} // namespace detail - -/*! \brief class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, whose - * size is known statically. - */ -template -class size_t -{ - private: - ::size_t data_[N]; - - public: - //! \brief Initialize size_t to all 0s - size_t() - { - for (int i = 0; i < N; ++i) - { - data_[i] = 0; - } - } - - ::size_t &operator[](int index) - { - return data_[index]; - } - - const ::size_t &operator[](int index) const - { - return data_[index]; - } - - //! \brief Conversion operator to T*. - operator ::size_t *() { return data_; } - - //! \brief Conversion operator to const T*. - operator const ::size_t *() const { return data_; } -}; - -namespace detail -{ - -// Generic getInfoHelper. The final parameter is used to guide overload -// resolution: the actual parameter passed is an int, which makes this -// a worse conversion sequence than a specialization that declares the -// parameter as an int. -template -inline cl_int getInfoHelper(Functor f, cl_uint name, T *param, long) -{ - return f(name, sizeof(T), param, NULL); -} - -// Specialized getInfoHelper for VECTOR_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS *param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) - { - return err; - } - - T *value = (T *)alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) - { - return err; - } - - param->assign(&value[0], &value[required / sizeof(T)]); - return CL_SUCCESS; -} - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS *param, int, typename T::cl_type = 0) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) - { - return err; - } - - typename T::cl_type *value = (typename T::cl_type *)alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) - { - return err; - } - - ::size_t elements = required / sizeof(typename T::cl_type); - param->assign(&value[0], &value[elements]); - for (::size_t i = 0; i < elements; i++) - { - if (value[i] != NULL) - { - err = (*param)[i].retain(); - if (err != CL_SUCCESS) - { - return err; - } - } - } - return CL_SUCCESS; -} - -// Specialized for getInfo -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS *param, int) -{ - cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - - if (err != CL_SUCCESS) - { - return err; - } - - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for STRING_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS *param, long) -{ -#if defined(__NO_STD_VECTOR) || defined(__NO_STD_STRING) - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) - { - return err; - } - - char *value = (char *)alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) - { - return err; - } - - *param = value; - return CL_SUCCESS; -#else - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) - { - return err; - } - - // std::string has a constant data member - // a char vector does not - VECTOR_CLASS value(required); - err = f(name, required, value.data(), NULL); - if (err != CL_SUCCESS) - { - return err; - } - if (param) - { - param->assign(value.begin(), value.end()); - } -#endif - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for cl::size_t params -template -inline cl_int getInfoHelper(Func f, cl_uint name, size_t *param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) - { - return err; - } - - ::size_t *value = (::size_t *)alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) - { - return err; - } - - for (int i = 0; i < N; ++i) - { - (*param)[i] = value[i]; - } - - return CL_SUCCESS; -} - -template -struct ReferenceHandler; - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, T *param, int, typename T::cl_type = 0) -{ - typename T::cl_type value; - cl_int err = f(name, sizeof(value), &value, NULL); - if (err != CL_SUCCESS) - { - return err; - } - *param = value; - if (value != NULL) - { - err = param->retain(); - if (err != CL_SUCCESS) - { - return err; - } - } - return CL_SUCCESS; -} - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS<::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_int) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void *) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS<::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - -#if defined(CL_VERSION_1_2) -#define __PARAM_NAME_INFO_1_2(F) \ - F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ - \ - F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ - F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ - \ - F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ - \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ - \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \ - F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) -#endif // #if defined(CL_VERSION_1_2) - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits -{ -}; - -#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ - struct token; \ - template <> \ - struct param_traits \ - { \ - enum \ - { \ - value = param_name \ - }; \ - typedef T param_type; \ - }; - -__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 -#if defined(CL_VERSION_1_2) -__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#ifdef CL_PLATFORM_ICD_SUFFIX_KHR -__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) -#endif - -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) -#endif - -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS<::size_t>) -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) -#endif - -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) -#endif -#ifdef CL_DEVICE_WARP_SIZE_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) -#endif -#ifdef CL_DEVICE_GPU_OVERLAP_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) -#endif -#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) -#endif -#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) -#endif - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T *param) -{ - return getInfoHelper(f, name, param, 0); -} - -template -struct GetInfoFunctor0 -{ - Func f_; - const Arg0 &arg0_; - cl_int operator()( - cl_uint param, ::size_t size, void *value, ::size_t *size_ret) - { - return f_(arg0_, param, size, value, size_ret); - } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; - const Arg0 &arg0_; - const Arg1 &arg1_; - cl_int operator()( - cl_uint param, ::size_t size, void *value, ::size_t *size_ret) - { - return f_(arg0_, arg1_, param, size, value, size_ret); - } -}; - -template -inline cl_int -getInfo(Func f, const Arg0 &arg0, cl_uint name, T *param) -{ - GetInfoFunctor0 f0 = {f, arg0}; - return getInfoHelper(f0, name, param, 0); -} - -template -inline cl_int -getInfo(Func f, const Arg0 &arg0, const Arg1 &arg1, cl_uint name, T *param) -{ - GetInfoFunctor1 f0 = {f, arg0, arg1}; - return getInfoHelper(f0, name, param, 0); -} - -template -struct ReferenceHandler -{ -}; - -#if defined(CL_VERSION_1_2) -/** - * OpenCL 1.2 devices do have retain/release. - */ -template <> -struct ReferenceHandler -{ - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int retain(cl_device_id device) - { - return ::clRetainDevice(device); - } - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int release(cl_device_id device) - { - return ::clReleaseDevice(device); - } -}; -#else // #if defined(CL_VERSION_1_2) -/** - * OpenCL 1.1 devices do not have retain/release. - */ -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { - return CL_SUCCESS; - } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { - return CL_SUCCESS; - } -}; -#endif // #if defined(CL_VERSION_1_2) - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { - return CL_SUCCESS; - } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { - return CL_SUCCESS; - } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { - return ::clRetainContext(context); - } - static cl_int release(cl_context context) - { - return ::clReleaseContext(context); - } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { - return ::clRetainCommandQueue(queue); - } - static cl_int release(cl_command_queue queue) - { - return ::clReleaseCommandQueue(queue); - } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { - return ::clRetainMemObject(memory); - } - static cl_int release(cl_mem memory) - { - return ::clReleaseMemObject(memory); - } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { - return ::clRetainSampler(sampler); - } - static cl_int release(cl_sampler sampler) - { - return ::clReleaseSampler(sampler); - } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { - return ::clRetainProgram(program); - } - static cl_int release(cl_program program) - { - return ::clReleaseProgram(program); - } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { - return ::clRetainKernel(kernel); - } - static cl_int release(cl_kernel kernel) - { - return ::clReleaseKernel(kernel); - } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { - return ::clRetainEvent(event); - } - static cl_int release(cl_event event) - { - return ::clReleaseEvent(event); - } -}; - -// Extracts version number with major in the upper 16 bits, minor in the lower 16 -static cl_uint getVersion(const char *versionInfo) -{ - int highVersion = 0; - int lowVersion = 0; - int index = 7; - while (versionInfo[index] != '.') - { - highVersion *= 10; - highVersion += versionInfo[index] - '0'; - ++index; - } - ++index; - while (versionInfo[index] != ' ' && versionInfo[index] != '\0') - { - lowVersion *= 10; - lowVersion += versionInfo[index] - '0'; - ++index; - } - return (highVersion << 16) | lowVersion; -} - -static cl_uint getPlatformVersion(cl_platform_id platform) -{ - ::size_t size = 0; - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); - char *versionInfo = (char *)alloca(size); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); - return getVersion(versionInfo); -} - -static cl_uint getDevicePlatformVersion(cl_device_id device) -{ - cl_platform_id platform; - clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); - return getPlatformVersion(platform); -} - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -static cl_uint getContextPlatformVersion(cl_context context) -{ - // The platform cannot be queried directly, so we first have to grab a - // device and obtain its context - ::size_t size = 0; - clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); - if (size == 0) - return 0; - cl_device_id *devices = (cl_device_id *)alloca(size); - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); - return getDevicePlatformVersion(devices[0]); -} -#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -template -class Wrapper -{ - public: - typedef T cl_type; - - protected: - cl_type object_; - - public: - Wrapper() : object_(NULL) {} - - Wrapper(const cl_type &obj) : object_(obj) {} - - ~Wrapper() - { - if (object_ != NULL) - { - release(); - } - } - - Wrapper(const Wrapper &rhs) - { - object_ = rhs.object_; - if (object_ != NULL) - { - detail::errHandler(retain(), __RETAIN_ERR); - } - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper(Wrapper &&rhs) CL_HPP_NOEXCEPT - { - object_ = rhs.object_; - rhs.object_ = NULL; - } -#endif - - Wrapper &operator=(const Wrapper &rhs) - { - if (this != &rhs) - { - if (object_ != NULL) - { - detail::errHandler(release(), __RELEASE_ERR); - } - object_ = rhs.object_; - if (object_ != NULL) - { - detail::errHandler(retain(), __RETAIN_ERR); - } - } - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper &operator=(Wrapper &&rhs) - { - if (this != &rhs) - { - if (object_ != NULL) - { - detail::errHandler(release(), __RELEASE_ERR); - } - object_ = rhs.object_; - rhs.object_ = NULL; - } - return *this; - } -#endif - - Wrapper &operator=(const cl_type &rhs) - { - if (object_ != NULL) - { - detail::errHandler(release(), __RELEASE_ERR); - } - object_ = rhs; - return *this; - } - - cl_type operator()() const { return object_; } - - cl_type &operator()() { return object_; } - - protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U *, int, typename U::cl_type); - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -template <> -class Wrapper -{ - public: - typedef cl_device_id cl_type; - - protected: - cl_type object_; - bool referenceCountable_; - - static bool isReferenceCountable(cl_device_id device) - { - bool retVal = false; - if (device != NULL) - { - int version = getDevicePlatformVersion(device); - if (version > ((1 << 16) + 1)) - { - retVal = true; - } - } - return retVal; - } - - public: - Wrapper() : object_(NULL), referenceCountable_(false) - { - } - - Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) - { - referenceCountable_ = isReferenceCountable(obj); - } - - ~Wrapper() - { - if (object_ != NULL) - { - release(); - } - } - - Wrapper(const Wrapper &rhs) - { - object_ = rhs.object_; - referenceCountable_ = isReferenceCountable(object_); - if (object_ != NULL) - { - detail::errHandler(retain(), __RETAIN_ERR); - } - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper(Wrapper &&rhs) CL_HPP_NOEXCEPT - { - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = NULL; - rhs.referenceCountable_ = false; - } -#endif - - Wrapper &operator=(const Wrapper &rhs) - { - if (this != &rhs) - { - if (object_ != NULL) - { - detail::errHandler(release(), __RELEASE_ERR); - } - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - if (object_ != NULL) - { - detail::errHandler(retain(), __RETAIN_ERR); - } - } - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper &operator=(Wrapper &&rhs) - { - if (this != &rhs) - { - if (object_ != NULL) - { - detail::errHandler(release(), __RELEASE_ERR); - } - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = NULL; - rhs.referenceCountable_ = false; - } - return *this; - } -#endif - - Wrapper &operator=(const cl_type &rhs) - { - if (object_ != NULL) - { - detail::errHandler(release(), __RELEASE_ERR); - } - object_ = rhs; - referenceCountable_ = isReferenceCountable(object_); - return *this; - } - - cl_type operator()() const { return object_; } - - cl_type &operator()() { return object_; } - - protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U *, int, typename U::cl_type); - - template - friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS *, int, typename U::cl_type); - - cl_int retain() const - { - if (referenceCountable_) - { - return ReferenceHandler::retain(object_); - } - else - { - return CL_SUCCESS; - } - } - - cl_int release() const - { - if (referenceCountable_) - { - return ReferenceHandler::release(object_); - } - else - { - return CL_SUCCESS; - } - } -}; - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief Adds constructors and member functions for cl_image_format. - * - * \see cl_image_format - */ -struct ImageFormat : public cl_image_format -{ - //! \brief Default constructor - performs no initialization. - ImageFormat() {} - - //! \brief Initializing constructor. - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - //! \brief Assignment operator. - ImageFormat &operator=(const ImageFormat &rhs) - { - if (this != &rhs) - { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \brief Class interface for cl_device_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_device_id - */ -class Device : public detail::Wrapper -{ - public: - //! \brief Default constructor - initializes to NULL. - Device() : detail::Wrapper() {} - - /*! \brief Constructor from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - __CL_EXPLICIT_CONSTRUCTORS Device(const cl_device_id &device) : detail::Wrapper(device) {} - - /*! \brief Returns the first device on the default context. - * - * \see Context::getDefault() - */ - static Device getDefault(cl_int *err = NULL); - - /*! \brief Assignment operator from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device &operator=(const cl_device_id &rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Device(const Device &dev) : detail::Wrapper(dev) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Device &operator=(const Device &dev) - { - detail::Wrapper::operator=(dev); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Device(Device &&dev) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(dev)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Device &operator=(Device &&dev) - { - detail::Wrapper::operator=(std::move(dev)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetDeviceInfo(). - template - cl_int getInfo(cl_device_info name, T *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - //! \brief Wrapper for clGetDeviceInfo() that returns by value. - template - typename detail::param_traits::param_type - getInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - - /** - * CL 1.2 version - */ -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clCreateSubDevicesEXT(). - cl_int createSubDevices( - const cl_device_partition_property *properties, - VECTOR_CLASS *devices) - { - cl_uint n = 0; - cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id *ids = (cl_device_id *)alloca(n * sizeof(cl_device_id)); - err = clCreateSubDevices(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(CL_VERSION_1_2) - -/** - * CL 1.1 version that uses device fission. - */ -#if defined(CL_VERSION_1_1) -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext *properties, - VECTOR_CLASS *devices) - { - typedef CL_API_ENTRY cl_int(CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id *ids = (cl_device_id *)alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(USE_CL_DEVICE_FISSION) -#endif // #if defined(CL_VERSION_1_1) -}; - -/*! \brief Class interface for cl_platform_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_platform_id - */ -class Platform : public detail::Wrapper -{ - public: - //! \brief Default constructor - initializes to NULL. - Platform() : detail::Wrapper() {} - - /*! \brief Constructor from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - __CL_EXPLICIT_CONSTRUCTORS Platform(const cl_platform_id &platform) : detail::Wrapper(platform) {} - - /*! \brief Assignment operator from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform &operator=(const cl_platform_id &rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetPlatformInfo(). - cl_int getInfo(cl_platform_info name, STRING_CLASS *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - //! \brief Wrapper for clGetPlatformInfo() that returns by value. - template - typename detail::param_traits::param_type - getInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - - /*! \brief Gets a list of devices for this platform. - * - * Wraps clGetDeviceIDs(). - */ - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS *devices) const - { - cl_uint n = 0; - if (devices == NULL) - { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id *ids = (cl_device_id *)alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS *devices) const - { - typedef CL_API_ENTRY cl_int(CL_API_CALL * PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices); - - if (devices == NULL) - { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id *ids = (cl_device_id *)alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - /*! \brief Gets a list of available platforms. - * - * Wraps clGetPlatformIDs(). - */ - static cl_int get( - VECTOR_CLASS *platforms) - { - cl_uint n = 0; - - if (platforms == NULL) - { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id *ids = (cl_platform_id *)alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static cl_int get( - Platform *platform) - { - cl_uint n = 0; - - if (platform == NULL) - { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id *ids = (cl_platform_id *)alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - *platform = ids[0]; - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform, returning it by value. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static Platform get( - cl_int *errResult = NULL) - { - Platform platform; - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) - { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) - { - *errResult = err; - } - return Platform(); - } - - cl_platform_id *ids = (cl_platform_id *)alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - - if (err != CL_SUCCESS) - { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) - { - *errResult = err; - } - return Platform(); - } - - return Platform(ids[0]); - } - - static Platform getDefault( - cl_int *errResult = NULL) - { - return get(errResult); - } - -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clUnloadCompiler(). - cl_int - unloadCompiler() - { - return ::clUnloadPlatformCompiler(object_); - } -#endif // #if defined(CL_VERSION_1_2) -}; // class Platform - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -/** - * Unload the OpenCL compiler. - * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int -UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} -#endif // #if defined(CL_VERSION_1_1) - -/*! \brief Class interface for cl_context. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_context as the original. For details, see - * clRetainContext() and clReleaseContext(). - * - * \see cl_context - */ -class Context - : public detail::Wrapper -{ - private: -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - static std::atomic default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static volatile int default_initialized_; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static Context default_; - static volatile cl_int default_error_; - - public: - /*! \brief Constructs a context including a list of specified devices. - * - * Wraps clCreateContext(). - */ - Context( - const VECTOR_CLASS &devices, - cl_context_properties *properties = NULL, - void(CL_CALLBACK *notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void *data = NULL, - cl_int *err = NULL) - { - cl_int error; - - ::size_t numDevices = devices.size(); - cl_device_id *deviceIDs = (cl_device_id *)alloca(numDevices * sizeof(cl_device_id)); - for (::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex) - { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateContext( - properties, (cl_uint)numDevices, - deviceIDs, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) - { - *err = error; - } - } - - Context( - const Device &device, - cl_context_properties *properties = NULL, - void(CL_CALLBACK *notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void *data = NULL, - cl_int *err = NULL) - { - cl_int error; - - cl_device_id deviceID = device(); - - object_ = ::clCreateContext( - properties, 1, - &deviceID, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) - { - *err = error; - } - } - - /*! \brief Constructs a context including all or a subset of devices of a specified type. - * - * Wraps clCreateContextFromType(). - */ - Context( - cl_device_type type, - cl_context_properties *properties = NULL, - void(CL_CALLBACK *notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void *data = NULL, - cl_int *err = NULL) - { - cl_int error; - -#if !defined(__APPLE__) && !defined(__MACOS) - cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0}; - - if (properties == NULL) - { - // Get a valid platform ID as we cannot send in a blank one - VECTOR_CLASS platforms; - error = Platform::get(&platforms); - if (error != CL_SUCCESS) - { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) - { - *err = error; - } - return; - } - - // Check the platforms we found for a device of our specified type - cl_context_properties platform_id = 0; - for (unsigned int i = 0; i < platforms.size(); i++) - { - - VECTOR_CLASS devices; - -#if defined(__CL_ENABLE_EXCEPTIONS) - try - { -#endif - - error = platforms[i].getDevices(type, &devices); - -#if defined(__CL_ENABLE_EXCEPTIONS) - } - catch (Error) - { - } - // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type - // We do error checking next anyway, and can throw there if needed -#endif - - // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND - if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) - { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) - { - *err = error; - } - } - - if (devices.size() > 0) - { - platform_id = (cl_context_properties)platforms[i](); - break; - } - } - - if (platform_id == 0) - { - detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) - { - *err = CL_DEVICE_NOT_FOUND; - } - return; - } - - prop[1] = platform_id; - properties = &prop[0]; - } -#endif - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) - { - *err = error; - } - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Context(const Context &ctx) : detail::Wrapper(ctx) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Context &operator=(const Context &ctx) - { - detail::Wrapper::operator=(ctx); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Context(Context &&ctx) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(ctx)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Context &operator=(Context &&ctx) - { - detail::Wrapper::operator=(std::move(ctx)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. - * - * \note All calls to this function return the same cl_context as the first. - */ - static Context getDefault(cl_int *err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) - { - if (err != NULL) - { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) - { - // Assume writes will propagate eventually... - while (default_initialized_ != __DEFAULT_INITIALIZED) - { - detail::fence(); - } - - if (err != NULL) - { - *err = default_error_; - } - return default_; - } - - cl_int error; - default_ = Context( - CL_DEVICE_TYPE_DEFAULT, - NULL, - NULL, - NULL, - &error); - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) - { - *err = default_error_; - } - return default_; - } - - //! \brief Default constructor - initializes to NULL. - Context() : detail::Wrapper() {} - - /*! \brief Constructor from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_context - * into the new Context object. - */ - __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context &context) : detail::Wrapper(context) {} - - /*! \brief Assignment operator from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseContext() on the value previously held by this instance. - */ - Context &operator=(const cl_context &rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetContextInfo(). - template - cl_int getInfo(cl_context_info name, T *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - //! \brief Wrapper for clGetContextInfo() that returns by value. - template - typename detail::param_traits::param_type - getInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - - /*! \brief Gets a list of supported image formats. - * - * Wraps clGetSupportedImageFormats(). - */ - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS *formats) const - { - cl_uint numEntries; - - if (!formats) - { - return CL_SUCCESS; - } - - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - if (numEntries > 0) - { - ImageFormat *value = (ImageFormat *) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format *)value, - NULL); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - } - else - { - formats->clear(); - } - return CL_SUCCESS; - } -}; - -inline Device Device::getDefault(cl_int *err) -{ - cl_int error; - Device device; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) - { - *err = error; - } - } - else - { - device = context.getInfo()[0]; - if (err != NULL) - { - *err = CL_SUCCESS; - } - } - - return device; -} - -#ifdef _WIN32 -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) std::atomic Context::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) Context Context::default_; -__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; -#else // !_WIN32 -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) std::atomic Context::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) Context Context::default_; -__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; -#endif // !_WIN32 - -/*! \brief Class interface for cl_event. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_event as the original. For details, see - * clRetainEvent() and clReleaseEvent(). - * - * \see cl_event - */ -class Event : public detail::Wrapper -{ - public: - //! \brief Default constructor - initializes to NULL. - Event() : detail::Wrapper() {} - - /*! \brief Constructor from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_event - * into the new Event object. - */ - __CL_EXPLICIT_CONSTRUCTORS Event(const cl_event &event) : detail::Wrapper(event) {} - - /*! \brief Assignment operator from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseEvent() on the value previously held by this instance. - */ - Event &operator=(const cl_event &rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetEventInfo(). - template - cl_int getInfo(cl_event_info name, T *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - //! \brief Wrapper for clGetEventInfo() that returns by value. - template - typename detail::param_traits::param_type - getInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - - //! \brief Wrapper for clGetEventProfilingInfo(). - template - cl_int getProfilingInfo(cl_profiling_info name, T *param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. - template - typename detail::param_traits::param_type - getProfilingInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - - /*! \brief Blocks the calling thread until this event completes. - * - * Wraps clWaitForEvents(). - */ - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a user callback function for a specific command execution status. - * - * Wraps clSetEventCallback(). - */ - cl_int setCallback( - cl_int type, - void(CL_CALLBACK *pfn_notify)(cl_event, cl_int, void *), - void *user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - /*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ - static cl_int - waitForEvents(const VECTOR_CLASS &events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint)events.size(), (events.size() > 0) ? (cl_event *)&events.front() : NULL), - __WAIT_FOR_EVENTS_ERR); - } -}; - -#if defined(CL_VERSION_1_1) -/*! \brief Class interface for user events (a subset of cl_event's). - * - * See Event for details about copy semantics, etc. - */ -class UserEvent : public Event -{ - public: - /*! \brief Constructs a user event on a given context. - * - * Wraps clCreateUserEvent(). - */ - UserEvent( - const Context &context, - cl_int *err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) - { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - UserEvent() : Event() {} - - /*! \brief Sets the execution status of a user event object. - * - * Wraps clSetUserEventStatus(). - */ - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_, status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -/*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ -inline static cl_int -WaitForEvents(const VECTOR_CLASS &events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint)events.size(), (events.size() > 0) ? (cl_event *)&events.front() : NULL), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \brief Class interface for cl_mem. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_mem as the original. For details, see - * clRetainMemObject() and clReleaseMemObject(). - * - * \see cl_mem - */ -class Memory : public detail::Wrapper -{ - public: - //! \brief Default constructor - initializes to NULL. - Memory() : detail::Wrapper() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_mem - * into the new Memory object. - */ - __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem &memory) : detail::Wrapper(memory) {} - - /*! \brief Assignment operator from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseMemObject() on the value previously held by this instance. - */ - Memory &operator=(const cl_mem &rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Memory(const Memory &mem) : detail::Wrapper(mem) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Memory &operator=(const Memory &mem) - { - detail::Wrapper::operator=(mem); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Memory(Memory &&mem) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(mem)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Memory &operator=(Memory &&mem) - { - detail::Wrapper::operator=(std::move(mem)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetMemObjectInfo(). - template - cl_int getInfo(cl_mem_info name, T *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - //! \brief Wrapper for clGetMemObjectInfo() that returns by value. - template - typename detail::param_traits::param_type - getInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a callback function to be called when the memory object - * is no longer needed. - * - * Wraps clSetMemObjectDestructorCallback(). - * - * Repeated calls to this function, for a given cl_mem value, will append - * to the list of functions called (in reverse order) when memory object's - * resources are freed and the memory object is deleted. - * - * \note - * The registered callbacks are associated with the underlying cl_mem - * value - not the Memory class instance. - */ - cl_int setDestructorCallback( - void(CL_CALLBACK *pfn_notify)(cl_mem, void *), - void *user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif -}; - -// Pre-declare copy functions -class Buffer; -template -cl_int copy(IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer); -template -cl_int copy(const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator); -template -cl_int copy(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer); -template -cl_int copy(const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator); - -/*! \brief Class interface for Buffer Memory Objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Buffer : public Memory -{ - public: - /*! \brief Constructs a Buffer in a specified context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - */ - Buffer( - const Context &context, - cl_mem_flags flags, - ::size_t size, - void *host_ptr = NULL, - cl_int *err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - } - - /*! \brief Constructs a Buffer in the default context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - * - * \see Context::getDefault() - */ - Buffer( - cl_mem_flags flags, - ::size_t size, - void *host_ptr = NULL, - cl_int *err = NULL) - { - cl_int error; - - Context context = Context::getDefault(err); - - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template - Buffer( - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr = false, - cl_int *err = NULL) - { - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if (readOnly) - { - flags |= CL_MEM_READ_ONLY; - } - else - { - flags |= CL_MEM_READ_WRITE; - } - if (useHostPtr) - { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType) * (endIterator - startIterator); - - Context context = Context::getDefault(err); - - if (useHostPtr) - { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } - else - { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - - if (!useHostPtr) - { - error = cl::copy(startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified context. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template - Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int *err = NULL); - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified queue. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template - Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int *err = NULL); - - //! \brief Default constructor - initializes to NULL. - Buffer() : Memory() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem &buffer) : Memory(buffer) {} - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Buffer &operator=(const cl_mem &rhs) - { - Memory::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Buffer(const Buffer &buf) : Memory(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Buffer &operator=(const Buffer &buf) - { - Memory::operator=(buf); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Buffer(Buffer &&buf) CL_HPP_NOEXCEPT : Memory(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Buffer &operator=(Buffer &&buf) - { - Memory::operator=(std::move(buf)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - -#if defined(CL_VERSION_1_1) - /*! \brief Creates a new buffer object from this. - * - * Wraps clCreateSubBuffer(). - */ - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void *buffer_create_info, - cl_int *err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) - { - *err = error; - } - - return result; - } -#endif -}; - -#if defined(USE_DX_INTEROP) -/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. - * - * This is provided to facilitate interoperability with Direct3D. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferD3D10 : public Buffer -{ - public: - typedef CL_API_ENTRY cl_mem(CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer *buffer, - cl_int *errcode_ret); - - /*! \brief Constructs a BufferD3D10, in a specified context, from a - * given ID3D10Buffer. - * - * Wraps clCreateFromD3D10BufferKHR(). - */ - BufferD3D10( - const Context &context, - cl_mem_flags flags, - ID3D10Buffer *bufobj, - cl_int *err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - -#if defined(CL_VERSION_1_2) - vector props = context.getInfo(); - cl_platform platform = -1; - for (int i = 0; i < props.size(); ++i) - { - if (props[i] == CL_CONTEXT_PLATFORM) - { - platform = props[i + 1]; - } - } - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); -#endif - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferD3D10() : Buffer() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem &buffer) : Buffer(buffer) {} - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10 &operator=(const cl_mem &rhs) - { - Buffer::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(const BufferD3D10 &buf) : Buffer(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10 &operator=(const BufferD3D10 &buf) - { - Buffer::operator=(buf); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(BufferD3D10 &&buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10 &operator=(BufferD3D10 &&buf) - { - Buffer::operator=(std::move(buf)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif - -/*! \brief Class interface for GL Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferGL : public Buffer -{ - public: - /*! \brief Constructs a BufferGL in a specified context, from a given - * GL buffer. - * - * Wraps clCreateFromGLBuffer(). - */ - BufferGL( - const Context &context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int *err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferGL() : Buffer() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem &buffer) : Buffer(buffer) {} - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL &operator=(const cl_mem &rhs) - { - Buffer::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferGL(const BufferGL &buf) : Buffer(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferGL &operator=(const BufferGL &buf) - { - Buffer::operator=(buf); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferGL(BufferGL &&buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferGL &operator=(BufferGL &&buf) - { - Buffer::operator=(std::move(buf)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint *gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_, type, gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief C++ base class for Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image : public Memory -{ - protected: - //! \brief Default constructor - initializes to NULL. - Image() : Memory() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem &image) : Memory(image) {} - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image &operator=(const cl_mem &rhs) - { - Memory::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image(const Image &img) : Memory(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image &operator=(const Image &img) - { - Memory::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image(Image &&img) CL_HPP_NOEXCEPT : Memory(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image &operator=(Image &&img) - { - Memory::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - public: - //! \brief Wrapper for clGetImageInfo(). - template - cl_int getImageInfo(cl_image_info name, T *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - //! \brief Wrapper for clGetImageInfo() that returns by value. - template - typename detail::param_traits::param_type - getImageInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } -}; - -#if defined(CL_VERSION_1_2) -/*! \brief Class interface for 1D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image1D : public Image -{ - public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image1D( - const Context &context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - void *host_ptr = NULL, - cl_int *err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D, - width, - 0, 0, 0, 0, 0, 0, 0, 0}; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) - { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image1D() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem &image1D) : Image(image1D) {} - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image1D &operator=(const cl_mem &rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1D(const Image1D &img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1D &operator=(const Image1D &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1D(Image1D &&img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1D &operator=(Image1D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -/*! \class Image1DBuffer - * \brief Image interface for 1D buffer images. - */ -class Image1DBuffer : public Image -{ - public: - Image1DBuffer( - const Context &context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - const Buffer &buffer, - cl_int *err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - width, - 0, 0, 0, 0, 0, 0, 0, - buffer()}; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - NULL, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) - { - *err = error; - } - } - - Image1DBuffer() {} - - __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem &image1D) : Image(image1D) {} - - Image1DBuffer &operator=(const cl_mem &rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer(const Image1DBuffer &img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer &operator=(const Image1DBuffer &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer(Image1DBuffer &&img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer &operator=(Image1DBuffer &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -/*! \class Image1DArray - * \brief Image interface for arrays of 1D images. - */ -class Image1DArray : public Image -{ - public: - Image1DArray( - const Context &context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t rowPitch, - void *host_ptr = NULL, - cl_int *err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_ARRAY, - width, - 0, 0, // height, depth (unused) - arraySize, - rowPitch, - 0, 0, 0, 0}; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) - { - *err = error; - } - } - - Image1DArray() {} - - __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem &imageArray) : Image(imageArray) {} - - Image1DArray &operator=(const cl_mem &rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DArray(const Image1DArray &img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DArray &operator=(const Image1DArray &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DArray(Image1DArray &&img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DArray &operator=(Image1DArray &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for 2D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image2D : public Image -{ - public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context &context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void *host_ptr = NULL, - cl_int *err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, 0}; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) - { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { -#if defined(MAC_OSX) -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - object_ = ::clCreateImage2D( - context(), flags, &format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) - { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image2D() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem &image2D) : Image(image2D) {} - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2D &operator=(const cl_mem &rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2D(const Image2D &img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2D &operator=(const Image2D &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2D(Image2D &&img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2D &operator=(Image2D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 2D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D -{ - public: - /*! \brief Constructs an Image2DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture2D(). - */ - Image2DGL( - const Context &context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int *err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); - if (err != NULL) - { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image2DGL() : Image2D() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem &image) : Image2D(image) {} - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2DGL &operator=(const cl_mem &rhs) - { - Image2D::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DGL(const Image2DGL &img) : Image2D(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DGL &operator=(const Image2DGL &img) - { - Image2D::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DGL(Image2DGL &&img) CL_HPP_NOEXCEPT : Image2D(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DGL &operator=(Image2DGL &&img) - { - Image2D::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class Image2DArray - * \brief Image interface for arrays of 2D images. - */ -class Image2DArray : public Image -{ - public: - Image2DArray( - const Context &context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t height, - ::size_t rowPitch, - ::size_t slicePitch, - void *host_ptr = NULL, - cl_int *err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D_ARRAY, - width, - height, - 0, // depth (unused) - arraySize, - rowPitch, - slicePitch, - 0, 0, 0}; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) - { - *err = error; - } - } - - Image2DArray() {} - - __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem &imageArray) : Image(imageArray) {} - - Image2DArray &operator=(const cl_mem &rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DArray(const Image2DArray &img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DArray &operator=(const Image2DArray &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DArray(Image2DArray &&img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DArray &operator=(Image2DArray &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for 3D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3D : public Image -{ - public: - /*! \brief Constructs a 3D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image3D( - const Context &context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void *host_ptr = NULL, - cl_int *err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE3D, - width, - height, - depth, - 0, // array size (unused) - row_pitch, - slice_pitch, - 0, 0, 0}; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) - { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) - { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image3D() : Image() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem &image3D) : Image(image3D) {} - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3D &operator=(const cl_mem &rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3D(const Image3D &img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3D &operator=(const Image3D &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3D(Image3D &&img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3D &operator=(Image3D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 3D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3DGL : public Image3D -{ - public: - /*! \brief Constructs an Image3DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture3D(). - */ - Image3DGL( - const Context &context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int *err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); - if (err != NULL) - { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image3DGL() : Image3D() {} - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem &image) : Image3D(image) {} - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL &operator=(const cl_mem &rhs) - { - Image3D::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3DGL(const Image3DGL &img) : Image3D(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3DGL &operator=(const Image3DGL &img) - { - Image3D::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3DGL(Image3DGL &&img) CL_HPP_NOEXCEPT : Image3D(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3DGL &operator=(Image3DGL &&img) - { - Image3D::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class ImageGL - * \brief general image interface for GL interop. - * We abstract the 2D and 3D GL images into a single instance here - * that wraps all GL sourced images on the grounds that setup information - * was performed by OpenCL anyway. - */ -class ImageGL : public Image -{ - public: - ImageGL( - const Context &context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int *err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); - if (err != NULL) - { - *err = error; - } - } - - ImageGL() : Image() {} - - __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem &image) : Image(image) {} - - ImageGL &operator=(const cl_mem &rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - ImageGL(const ImageGL &img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - ImageGL &operator=(const ImageGL &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - ImageGL(ImageGL &&img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - ImageGL &operator=(ImageGL &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for GL Render Buffer Memory Objects. -* -* This is provided to facilitate interoperability with OpenGL. -* -* See Memory for details about copy semantics, etc. -* -* \see Memory -*/ -class BufferRenderGL : -#if defined(CL_VERSION_1_2) - public ImageGL -#else // #if defined(CL_VERSION_1_2) - public Image2DGL -#endif //#if defined(CL_VERSION_1_2) -{ - public: - /*! \brief Constructs a BufferRenderGL in a specified context, from a given - * GL Renderbuffer. - * - * Wraps clCreateFromGLRenderbuffer(). - */ - BufferRenderGL( - const Context &context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int *err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. -#if defined(CL_VERSION_1_2) - BufferRenderGL() : ImageGL(){}; -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL() : Image2DGL(){}; -#endif //#if defined(CL_VERSION_1_2) - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ -#if defined(CL_VERSION_1_2) - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem &buffer) : ImageGL(buffer) - { - } -#else // #if defined(CL_VERSION_1_2) - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem &buffer) : Image2DGL(buffer) - { - } -#endif //#if defined(CL_VERSION_1_2) - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL &operator=(const cl_mem &rhs) - { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(rhs); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(rhs); -#endif //#if defined(CL_VERSION_1_2) - - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ -#if defined(CL_VERSION_1_2) - BufferRenderGL(const BufferRenderGL &buf) : ImageGL(buf) - { - } -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL(const BufferRenderGL &buf) : Image2DGL(buf) - { - } -#endif //#if defined(CL_VERSION_1_2) - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL &operator=(const BufferRenderGL &rhs) - { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(rhs); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(rhs); -#endif //#if defined(CL_VERSION_1_2) - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ -#if defined(CL_VERSION_1_2) - BufferRenderGL(BufferRenderGL &&buf) CL_HPP_NOEXCEPT : ImageGL(std::move(buf)) - { - } -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL(BufferRenderGL &&buf) CL_HPP_NOEXCEPT : Image2DGL(std::move(buf)) - { - } -#endif //#if defined(CL_VERSION_1_2) - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL &operator=(BufferRenderGL &&buf) - { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(std::move(buf)); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(std::move(buf)); -#endif //#if defined(CL_VERSION_1_2) - - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint *gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_, type, gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief Class interface for cl_sampler. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_sampler as the original. For details, see - * clRetainSampler() and clReleaseSampler(). - * - * \see cl_sampler - */ -class Sampler : public detail::Wrapper -{ - public: - //! \brief Default constructor - initializes to NULL. - Sampler() {} - - /*! \brief Constructs a Sampler in a specified context. - * - * Wraps clCreateSampler(). - */ - Sampler( - const Context &context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int *err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) - { - *err = error; - } - } - - /*! \brief Constructor from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_sampler - * into the new Sampler object. - */ - __CL_EXPLICIT_CONSTRUCTORS Sampler(const cl_sampler &sampler) : detail::Wrapper(sampler) {} - - /*! \brief Assignment operator from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseSampler() on the value previously held by this instance. - */ - Sampler &operator=(const cl_sampler &rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Sampler(const Sampler &sam) : detail::Wrapper(sam) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Sampler &operator=(const Sampler &sam) - { - detail::Wrapper::operator=(sam); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Sampler(Sampler &&sam) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(sam)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Sampler &operator=(Sampler &&sam) - { - detail::Wrapper::operator=(std::move(sam)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetSamplerInfo(). - template - cl_int getInfo(cl_sampler_info name, T *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - //! \brief Wrapper for clGetSamplerInfo() that returns by value. - template - typename detail::param_traits::param_type - getInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } -}; - -class Program; -class CommandQueue; -class Kernel; - -//! \brief Class interface for specifying NDRange values. -class NDRange -{ - private: - size_t<3> sizes_; - cl_uint dimensions_; - - public: - //! \brief Default constructor - resulting range has zero dimensions. - NDRange() - : dimensions_(0) - { - } - - //! \brief Constructs one-dimensional range. - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_[0] = size0; - } - - //! \brief Constructs two-dimensional range. - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_[0] = size0; - sizes_[1] = size1; - } - - //! \brief Constructs three-dimensional range. - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_[0] = size0; - sizes_[1] = size1; - sizes_[2] = size2; - } - - /*! \brief Conversion operator to const ::size_t *. - * - * \returns a pointer to the size of the first dimension. - */ - operator const ::size_t *() const - { - return (const ::size_t *)sizes_; - } - - //! \brief Queries the number of dimensions in the range. - ::size_t dimensions() const { return dimensions_; } -}; - -//! \brief A zero-dimensional range. -static const NDRange NullRange; - -//! \brief Local address wrapper for use with Kernel::setArg -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail -{ - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T &) { return sizeof(T); } - static const T *ptr(const T &value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg &value) { return value.size_; } - static const void *ptr(const LocalSpaceArg &) { return NULL; } -}; - -} // namespace detail -//! \endcond - -/*! __local - * \brief Helper function for generating LocalSpaceArg objects. - * Deprecated. Replaced with Local. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg -__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = {size}; - return ret; -} - -/*! Local - * \brief Helper function for generating LocalSpaceArg objects. - */ -inline LocalSpaceArg -Local(::size_t size) -{ - LocalSpaceArg ret = {size}; - return ret; -} - -//class KernelFunctor; - -/*! \brief Class interface for cl_kernel. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_kernel as the original. For details, see - * clRetainKernel() and clReleaseKernel(). - * - * \see cl_kernel - */ -class Kernel : public detail::Wrapper -{ - public: - inline Kernel(const Program &program, const char *name, cl_int *err = NULL); - - //! \brief Default constructor - initializes to NULL. - Kernel() {} - - /*! \brief Constructor from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_kernel - * into the new Kernel object. - */ - __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel &kernel) : detail::Wrapper(kernel) {} - - /*! \brief Assignment operator from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseKernel() on the value previously held by this instance. - */ - Kernel &operator=(const cl_kernel &rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Kernel(const Kernel &kernel) : detail::Wrapper(kernel) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Kernel &operator=(const Kernel &kernel) - { - detail::Wrapper::operator=(kernel); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Kernel(Kernel &&kernel) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(kernel)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Kernel &operator=(Kernel &&kernel) - { - detail::Wrapper::operator=(std::move(kernel)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - template - cl_int getInfo(cl_kernel_info name, T *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template - typename detail::param_traits::param_type - getInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_2) - template - cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), - __GET_KERNEL_ARG_INFO_ERR); - } - - template - typename detail::param_traits::param_type - getArgInfo(cl_uint argIndex, cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_arg_info, name>::param_type param; - cl_int result = getArgInfo(argIndex, name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } -#endif // #if defined(CL_VERSION_1_2) - - template - cl_int getWorkGroupInfo( - const Device &device, cl_kernel_work_group_info name, T *param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template - typename detail::param_traits::param_type - getWorkGroupInfo(const Device &device, cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, const T &value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, const void *argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } -}; - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ - public: - typedef VECTOR_CLASS> Binaries; - typedef VECTOR_CLASS> Sources; - - Program( - const STRING_CLASS &source, - bool build = false, - cl_int *err = NULL) - { - cl_int error; - - const char *strings = source.c_str(); - const ::size_t length = source.size(); - - Context context = Context::getDefault(err); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) - { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) - { - *err = error; - } - } - - Program( - const Context &context, - const STRING_CLASS &source, - bool build = false, - cl_int *err = NULL) - { - cl_int error; - - const char *strings = source.c_str(); - const ::size_t length = source.size(); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) - { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) - { - *err = error; - } - } - - Program( - const Context &context, - const Sources &sources, - cl_int *err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t *lengths = (::size_t *)alloca(n * sizeof(::size_t)); - const char **strings = (const char **)alloca(n * sizeof(const char *)); - - for (::size_t i = 0; i < n; ++i) - { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) - { - *err = error; - } - } - - /** - * Construct a program object from a list of devices and a per-device list of binaries. - * \param context A valid OpenCL context in which to construct the program. - * \param devices A vector of OpenCL device objects for which the program will be created. - * \param binaries A vector of pairs of a pointer to a binary object and its length. - * \param binaryStatus An optional vector that on completion will be resized to - * match the size of binaries and filled with values to specify if each binary - * was successfully loaded. - * Set to CL_SUCCESS if the binary was successfully loaded. - * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. - * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. - * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: - * CL_INVALID_CONTEXT if context is not a valid context. - * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; - * or if any entry in binaries is NULL or has length 0. - * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. - * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. - */ - Program( - const Context &context, - const VECTOR_CLASS &devices, - const Binaries &binaries, - VECTOR_CLASS *binaryStatus = NULL, - cl_int *err = NULL) - { - cl_int error; - - const ::size_t numDevices = devices.size(); - - // Catch size mismatch early and return - if (binaries.size() != numDevices) - { - error = CL_INVALID_VALUE; - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) - { - *err = error; - } - return; - } - - ::size_t *lengths = (::size_t *)alloca(numDevices * sizeof(::size_t)); - const unsigned char **images = (const unsigned char **)alloca(numDevices * sizeof(const unsigned char **)); - - for (::size_t i = 0; i < numDevices; ++i) - { - images[i] = (const unsigned char *)binaries[i].first; - lengths[i] = binaries[(int)i].second; - } - - cl_device_id *deviceIDs = (cl_device_id *)alloca(numDevices * sizeof(cl_device_id)); - for (::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex) - { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - if (binaryStatus) - { - binaryStatus->resize(numDevices); - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint)devices.size(), - deviceIDs, - lengths, images, (binaryStatus != NULL && numDevices > 0) ? &binaryStatus->front() : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) - { - *err = error; - } - } - -#if defined(CL_VERSION_1_2) - /** - * Create program using builtin kernels. - * \param kernelNames Semi-colon separated list of builtin kernel names - */ - Program( - const Context &context, - const VECTOR_CLASS &devices, - const STRING_CLASS &kernelNames, - cl_int *err = NULL) - { - cl_int error; - - ::size_t numDevices = devices.size(); - cl_device_id *deviceIDs = (cl_device_id *)alloca(numDevices * sizeof(cl_device_id)); - for (::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex) - { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateProgramWithBuiltInKernels( - context(), - (cl_uint)devices.size(), - deviceIDs, - kernelNames.c_str(), - &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); - if (err != NULL) - { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) - - Program() - { - } - - __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program &program) : detail::Wrapper(program) {} - - Program &operator=(const cl_program &rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Program(const Program &program) : detail::Wrapper(program) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Program &operator=(const Program &program) - { - detail::Wrapper::operator=(program); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Program(Program &&program) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(program)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Program &operator=(Program &&program) - { - detail::Wrapper::operator=(std::move(program)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - cl_int build( - const VECTOR_CLASS &devices, - const char *options = NULL, - void(CL_CALLBACK *notifyFptr)(cl_program, void *) = NULL, - void *data = NULL) const - { - ::size_t numDevices = devices.size(); - cl_device_id *deviceIDs = (cl_device_id *)alloca(numDevices * sizeof(cl_device_id)); - for (::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex) - { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - deviceIDs, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - cl_int build( - const char *options = NULL, - void(CL_CALLBACK *notifyFptr)(cl_program, void *) = NULL, - void *data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - 0, - NULL, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - -#if defined(CL_VERSION_1_2) - cl_int compile( - const char *options = NULL, - void(CL_CALLBACK *notifyFptr)(cl_program, void *) = NULL, - void *data = NULL) const - { - return detail::errHandler( - ::clCompileProgram( - object_, - 0, - NULL, - options, - 0, - NULL, - NULL, - notifyFptr, - data), - __COMPILE_PROGRAM_ERR); - } -#endif - - template - cl_int getInfo(cl_program_info name, T *param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template - typename detail::param_traits::param_type - getInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device &device, cl_program_build_info name, T *param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template - typename detail::param_traits::param_type - getBuildInfo(const Device &device, cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS *kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel *value = (Kernel *)alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel *)value, NULL); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -#if defined(CL_VERSION_1_2) -inline Program linkProgram( - Program input1, - Program input2, - const char *options = NULL, - void(CL_CALLBACK *notifyFptr)(cl_program, void *) = NULL, - void *data = NULL, - cl_int *err = NULL) -{ - cl_int error_local = CL_SUCCESS; - - cl_program programs[2] = {input1(), input2()}; - - Context ctx = input1.getInfo(&error_local); - if (error_local != CL_SUCCESS) - { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - 2, - programs, - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local, __COMPILE_PROGRAM_ERR); - if (err != NULL) - { - *err = error_local; - } - - return Program(prog); -} - -inline Program linkProgram( - VECTOR_CLASS inputPrograms, - const char *options = NULL, - void(CL_CALLBACK *notifyFptr)(cl_program, void *) = NULL, - void *data = NULL, - cl_int *err = NULL) -{ - cl_int error_local = CL_SUCCESS; - - cl_program *programs = (cl_program *)alloca(inputPrograms.size() * sizeof(cl_program)); - - if (programs != NULL) - { - for (unsigned int i = 0; i < inputPrograms.size(); i++) - { - programs[i] = inputPrograms[i](); - } - } - - Context ctx; - if (inputPrograms.size() > 0) - { - ctx = inputPrograms[0].getInfo(&error_local); - if (error_local != CL_SUCCESS) - { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - } - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - (cl_uint)inputPrograms.size(), - programs, - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local, __COMPILE_PROGRAM_ERR); - if (err != NULL) - { - *err = error_local; - } - - return Program(prog); -} -#endif - -template <> -inline VECTOR_CLASS cl::Program::getInfo(cl_int *err) const -{ - VECTOR_CLASS<::size_t> sizes = getInfo(); - VECTOR_CLASS binaries; - for (VECTOR_CLASS<::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) - { - char *ptr = NULL; - if (*s != 0) - ptr = new char[*s]; - binaries.push_back(ptr); - } - - cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); - if (err != NULL) - { - *err = result; - } - return binaries; -} - -inline Kernel::Kernel(const Program &program, const char *name, cl_int *err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) - { - *err = error; - } -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ - private: -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - static std::atomic default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static volatile int default_initialized_; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static CommandQueue default_; - static volatile cl_int default_error_; - - public: - CommandQueue( - cl_command_queue_properties properties, - cl_int *err = NULL) - { - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) - { - *err = error; - } - } - else - { - Device device = context.getInfo()[0]; - - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) - { - *err = error; - } - } - } - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context - */ - explicit CommandQueue( - const Context &context, - cl_command_queue_properties properties = 0, - cl_int *err = NULL) - { - cl_int error; - VECTOR_CLASS devices; - error = context.getInfo(CL_CONTEXT_DEVICES, &devices); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) - { - *err = error; - } - return; - } - - object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (err != NULL) - { - *err = error; - } - } - - CommandQueue( - const Context &context, - const Device &device, - cl_command_queue_properties properties = 0, - cl_int *err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) - { - *err = error; - } - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(const CommandQueue &queue) : detail::Wrapper(queue) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue &operator=(const CommandQueue &queue) - { - detail::Wrapper::operator=(queue); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(CommandQueue &&queue) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(queue)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - CommandQueue &operator=(CommandQueue &&queue) - { - detail::Wrapper::operator=(std::move(queue)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - static CommandQueue getDefault(cl_int *err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) - { - if (err != NULL) - { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) - { - // Assume writes will propagate eventually... - while (default_initialized_ != __DEFAULT_INITIALIZED) - { - detail::fence(); - } - - if (err != NULL) - { - *err = default_error_; - } - return default_; - } - - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) - { - *err = error; - } - } - else - { - Device device = context.getInfo()[0]; - - default_ = CommandQueue(context, device, 0, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) - { - *err = error; - } - } - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) - { - *err = default_error_; - } - return default_; - } - - CommandQueue() {} - - __CL_EXPLICIT_CONSTRUCTORS CommandQueue(const cl_command_queue &commandQueue) : detail::Wrapper(commandQueue) {} - - CommandQueue &operator=(const cl_command_queue &rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T *param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template - typename detail::param_traits::param_type - getInfo(cl_int *err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) - { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer &buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBuffer( - const Buffer &buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBuffer( - const Buffer &src, - const Buffer &dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReadBufferRect( - const Buffer &buffer, - cl_bool blocking, - const size_t<3> &buffer_offset, - const size_t<3> &host_offset, - const size_t<3> ®ion, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBufferRect( - const Buffer &buffer, - cl_bool blocking, - const size_t<3> &buffer_offset, - const size_t<3> &host_offset, - const size_t<3> ®ion, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferRect( - const Buffer &src, - const Buffer &dst, - const size_t<3> &src_origin, - const size_t<3> &dst_origin, - const size_t<3> ®ion, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill a buffer object with a pattern - * of a given size. The pattern is specified a as vector. - * \tparam PatternType The datatype of the pattern field. - * The pattern type must be an accepted OpenCL data type. - */ - template - cl_int enqueueFillBuffer( - const Buffer &buffer, - PatternType pattern, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillBuffer( - object_, - buffer(), - static_cast(&pattern), - sizeof(PatternType), - offset, - size, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueReadImage( - const Image &image, - cl_bool blocking, - const size_t<3> &origin, - const size_t<3> ®ion, - ::size_t row_pitch, - ::size_t slice_pitch, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *)origin, - (const ::size_t *)region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteImage( - const Image &image, - cl_bool blocking, - const size_t<3> &origin, - const size_t<3> ®ion, - ::size_t row_pitch, - ::size_t slice_pitch, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *)origin, - (const ::size_t *)region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyImage( - const Image &src, - const Image &dst, - const size_t<3> &src_origin, - const size_t<3> &dst_origin, - const size_t<3> ®ion, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, (const ::size_t *)region, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA floating-point color value if - * the image channel data type is not an unnormalized signed or - * unsigned data type. - */ - cl_int enqueueFillImage( - const Image &image, - cl_float4 fillColor, - const size_t<3> &origin, - const size_t<3> ®ion, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *)origin, - (const ::size_t *)region, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA signed integer color value if - * the image channel data type is an unnormalized signed integer - * type. - */ - cl_int enqueueFillImage( - const Image &image, - cl_int4 fillColor, - const size_t<3> &origin, - const size_t<3> ®ion, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *)origin, - (const ::size_t *)region, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA unsigned integer color value if - * the image channel data type is an unnormalized unsigned integer - * type. - */ - cl_int enqueueFillImage( - const Image &image, - cl_uint4 fillColor, - const size_t<3> &origin, - const size_t<3> ®ion, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *)origin, - (const ::size_t *)region, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueCopyImageToBuffer( - const Image &src, - const Buffer &dst, - const size_t<3> &src_origin, - const size_t<3> ®ion, - ::size_t dst_offset, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *)src_origin, - (const ::size_t *)region, dst_offset, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferToImage( - const Buffer &src, - const Image &dst, - ::size_t src_offset, - const size_t<3> &dst_origin, - const size_t<3> ®ion, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *)dst_origin, (const ::size_t *)region, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - void *enqueueMapBuffer( - const Buffer &buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS *events = NULL, - Event *event = NULL, - cl_int *err = NULL) const - { - cl_event tmp; - cl_int error; - void *result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - if (event != NULL && error == CL_SUCCESS) - *event = tmp; - - return result; - } - - void *enqueueMapImage( - const Image &buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3> &origin, - const size_t<3> ®ion, - ::size_t *row_pitch, - ::size_t *slice_pitch, - const VECTOR_CLASS *events = NULL, - Event *event = NULL, - cl_int *err = NULL) const - { - cl_event tmp; - cl_int error; - void *result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *)origin, (const ::size_t *)region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) - { - *err = error; - } - if (event != NULL && error == CL_SUCCESS) - *event = tmp; - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory &memory, - void *mapped_ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueues a marker command which waits for either a list of events to complete, - * or all previously enqueued commands to complete. - * - * Enqueues a marker command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command returns an event which can be waited on, - * i.e. this event can be waited on to insure that all events either in the event_wait_list - * or all previously enqueued commands, queued before this command to command_queue, - * have completed. - */ - cl_int enqueueMarkerWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarkerWithWaitList( - object_, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * A synchronization point that enqueues a barrier operation. - * - * Enqueues a barrier command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command blocks command execution, that is, any - * following commands enqueued after it do not execute until it completes. This command - * returns an event which can be waited on, i.e. this event can be waited on to insure that - * all events either in the event_wait_list or all previously enqueued commands, queued - * before this command to command_queue, have completed. - */ - cl_int enqueueBarrierWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueBarrierWithWaitList( - object_, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_BARRIER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command to indicate with which device a set of memory objects - * should be associated. - */ - cl_int enqueueMigrateMemObjects( - const VECTOR_CLASS &memObjects, - cl_mem_migration_flags flags, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) - { - cl_event tmp; - - cl_mem *localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); - for (int i = 0; i < (int)memObjects.size(); ++i) - { - localMemObjects[i] = memObjects[i](); - } - - cl_int err = detail::errHandler( - ::clEnqueueMigrateMemObjects( - object_, - (cl_uint)memObjects.size(), - static_cast(localMemObjects), - flags, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueNDRangeKernel( - const Kernel &kernel, - const NDRange &offset, - const NDRange &global, - const NDRange &local = NullRange, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint)global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t *)offset : NULL, - (const ::size_t *)global, - local.dimensions() != 0 ? (const ::size_t *)local : NULL, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NDRANGE_KERNEL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueTask( - const Kernel &kernel, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_TASK_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueNativeKernel( - void(CL_CALLBACK *userFptr)(void *), - std::pair args, - const VECTOR_CLASS *mem_objects = NULL, - const VECTOR_CLASS *mem_locs = NULL, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_mem *mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem *)alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) - { - for (unsigned int i = 0; i < mem_objects->size(); i++) - { - mems[i] = ((*mem_objects)[i])(); - } - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint)mem_objects->size() : 0, - mems, - (mem_locs != NULL && mem_locs->size() > 0) ? (const void **)&mem_locs->front() : NULL, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NATIVE_KERNEL); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueMarker(Event *event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarker( - object_, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const VECTOR_CLASS &events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint)events.size(), - events.size() > 0 ? (const cl_event *)&events.front() : NULL), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS *mem_objects = NULL, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint)mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *)&mem_objects->front() : NULL, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS *mem_objects = NULL, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint)mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *)&mem_objects->front() : NULL, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(USE_DX_INTEROP) - typedef CL_API_ENTRY cl_int(CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - typedef CL_API_ENTRY cl_int(CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS *mem_objects = NULL, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); -#endif - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint)mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *)&mem_objects->front() : NULL, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS *mem_objects = NULL, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_2) -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_1) - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint)mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *)&mem_objects->front() : NULL, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -#ifdef _WIN32 -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) std::atomic CommandQueue::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) CommandQueue CommandQueue::default_; -__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; -#else // !_WIN32 -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) std::atomic CommandQueue::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) CommandQueue CommandQueue::default_; -__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; -#endif // !_WIN32 - -template -Buffer::Buffer( - const Context &context, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int *err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if (readOnly) - { - flags |= CL_MEM_READ_ONLY; - } - else - { - flags |= CL_MEM_READ_WRITE; - } - if (useHostPtr) - { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType) * (endIterator - startIterator); - - if (useHostPtr) - { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } - else - { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - - if (!useHostPtr) - { - CommandQueue queue(context, 0, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - } -} - -template -Buffer::Buffer( - const CommandQueue &queue, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int *err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if (readOnly) - { - flags |= CL_MEM_READ_ONLY; - } - else - { - flags |= CL_MEM_READ_WRITE; - } - if (useHostPtr) - { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType) * (endIterator - startIterator); - - Context context = queue.getInfo(); - - if (useHostPtr) - { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } - else - { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - - if (!useHostPtr) - { - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - } -} - -inline cl_int enqueueReadBuffer( - const Buffer &buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline cl_int enqueueWriteBuffer( - const Buffer &buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline void *enqueueMapBuffer( - const Buffer &buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS *events = NULL, - Event *event = NULL, - cl_int *err = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - - void *result = ::clEnqueueMapBuffer( - queue(), buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (cl_event *)event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) - { - *err = error; - } - return result; -} - -inline cl_int enqueueUnmapMemObject( - const Memory &memory, - void *mapped_ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (error != CL_SUCCESS) - { - return error; - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - queue(), memory(), mapped_ptr, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event *)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; -} - -inline cl_int enqueueCopyBuffer( - const Buffer &src, - const Buffer &dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses default command queue. - */ -template -inline cl_int copy(IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, startIterator, endIterator, buffer); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses default command queue. - */ -template -inline cl_int copy(const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, buffer, startIterator, endIterator); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses specified queue. - */ -template -inline cl_int copy(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator - startIterator; - ::size_t byteLength = length * sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if (error != CL_SUCCESS) - { - return error; - } -#if defined(_MSC_VER) - std::copy( - startIterator, - endIterator, - stdext::checked_array_iterator( - pointer, length)); -#else - std::copy(startIterator, endIterator, pointer); -#endif - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if (error != CL_SUCCESS) - { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses specified queue. - */ -template -inline cl_int copy(const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator - startIterator; - ::size_t byteLength = length * sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if (error != CL_SUCCESS) - { - return error; - } - std::copy(pointer, pointer + length, startIterator); - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if (error != CL_SUCCESS) - { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -#if defined(CL_VERSION_1_1) -inline cl_int enqueueReadBufferRect( - const Buffer &buffer, - cl_bool blocking, - const size_t<3> &buffer_offset, - const size_t<3> &host_offset, - const size_t<3> ®ion, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueReadBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteBufferRect( - const Buffer &buffer, - cl_bool blocking, - const size_t<3> &buffer_offset, - const size_t<3> &host_offset, - const size_t<3> ®ion, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueWriteBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyBufferRect( - const Buffer &src, - const Buffer &dst, - const size_t<3> &src_origin, - const size_t<3> &dst_origin, - const size_t<3> ®ion, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueCopyBufferRect( - src, - dst, - src_origin, - dst_origin, - region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); -} -#endif - -inline cl_int enqueueReadImage( - const Image &image, - cl_bool blocking, - const size_t<3> &origin, - const size_t<3> ®ion, - ::size_t row_pitch, - ::size_t slice_pitch, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueReadImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteImage( - const Image &image, - cl_bool blocking, - const size_t<3> &origin, - const size_t<3> ®ion, - ::size_t row_pitch, - ::size_t slice_pitch, - void *ptr, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueWriteImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyImage( - const Image &src, - const Image &dst, - const size_t<3> &src_origin, - const size_t<3> &dst_origin, - const size_t<3> ®ion, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueCopyImage( - src, - dst, - src_origin, - dst_origin, - region, - events, - event); -} - -inline cl_int enqueueCopyImageToBuffer( - const Image &src, - const Buffer &dst, - const size_t<3> &src_origin, - const size_t<3> ®ion, - ::size_t dst_offset, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueCopyImageToBuffer( - src, - dst, - src_origin, - region, - dst_offset, - events, - event); -} - -inline cl_int enqueueCopyBufferToImage( - const Buffer &src, - const Image &dst, - ::size_t src_offset, - const size_t<3> &dst_origin, - const size_t<3> ®ion, - const VECTOR_CLASS *events = NULL, - Event *event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.enqueueCopyBufferToImage( - src, - dst, - src_offset, - dst_origin, - region, - events, - event); -} - -inline cl_int flush(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.flush(); -} - -inline cl_int finish(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) - { - return error; - } - - return queue.finish(); -} - -// Kernel Functor support -// New interface as of September 2011 -// Requires the C++11 std::tr1::function (note do not support TR1) -// Visual Studio 2010 and GCC 4.2 - -struct EnqueueArgs -{ - CommandQueue queue_; - const NDRange offset_; - const NDRange global_; - const NDRange local_; - VECTOR_CLASS events_; - - EnqueueArgs(NDRange global) : queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - } - - EnqueueArgs(NDRange global, NDRange local) : queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - } - - EnqueueArgs(NDRange offset, NDRange global, NDRange local) : queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - } - - EnqueueArgs(Event e, NDRange global) : queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange global, NDRange local) : queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - } - - EnqueueArgs(CommandQueue &queue, NDRange global) : queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - } - - EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - } - - EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : queue_(queue), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : queue_(queue), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - } -}; - -namespace detail -{ - -class NullType -{ -}; - -template -struct SetArg -{ - static void set(Kernel kernel, T0 arg) - { - kernel.setArg(index, arg); - } -}; - -template -struct SetArg -{ - static void set(Kernel, NullType) - { - } -}; - -template < - typename T0, typename T1, typename T2, typename T3, - typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, - typename T12, typename T13, typename T14, typename T15, - typename T16, typename T17, typename T18, typename T19, - typename T20, typename T21, typename T22, typename T23, - typename T24, typename T25, typename T26, typename T27, - typename T28, typename T29, typename T30, typename T31> -class KernelFunctorGlobal -{ - private: - Kernel kernel_; - - public: - KernelFunctorGlobal( - Kernel kernel) : kernel_(kernel) - { - } - - KernelFunctorGlobal( - const Program &program, - const STRING_CLASS name, - cl_int *err = NULL) : kernel_(program, name.c_str(), err) - { - } - - Event operator()( - const EnqueueArgs &args, - T0 t0, - T1 t1 = NullType(), - T2 t2 = NullType(), - T3 t3 = NullType(), - T4 t4 = NullType(), - T5 t5 = NullType(), - T6 t6 = NullType(), - T7 t7 = NullType(), - T8 t8 = NullType(), - T9 t9 = NullType(), - T10 t10 = NullType(), - T11 t11 = NullType(), - T12 t12 = NullType(), - T13 t13 = NullType(), - T14 t14 = NullType(), - T15 t15 = NullType(), - T16 t16 = NullType(), - T17 t17 = NullType(), - T18 t18 = NullType(), - T19 t19 = NullType(), - T20 t20 = NullType(), - T21 t21 = NullType(), - T22 t22 = NullType(), - T23 t23 = NullType(), - T24 t24 = NullType(), - T25 t25 = NullType(), - T26 t26 = NullType(), - T27 t27 = NullType(), - T28 t28 = NullType(), - T29 t29 = NullType(), - T30 t30 = NullType(), - T31 t31 = NullType()) - { - Event event; - SetArg<0, T0>::set(kernel_, t0); - SetArg<1, T1>::set(kernel_, t1); - SetArg<2, T2>::set(kernel_, t2); - SetArg<3, T3>::set(kernel_, t3); - SetArg<4, T4>::set(kernel_, t4); - SetArg<5, T5>::set(kernel_, t5); - SetArg<6, T6>::set(kernel_, t6); - SetArg<7, T7>::set(kernel_, t7); - SetArg<8, T8>::set(kernel_, t8); - SetArg<9, T9>::set(kernel_, t9); - SetArg<10, T10>::set(kernel_, t10); - SetArg<11, T11>::set(kernel_, t11); - SetArg<12, T12>::set(kernel_, t12); - SetArg<13, T13>::set(kernel_, t13); - SetArg<14, T14>::set(kernel_, t14); - SetArg<15, T15>::set(kernel_, t15); - SetArg<16, T16>::set(kernel_, t16); - SetArg<17, T17>::set(kernel_, t17); - SetArg<18, T18>::set(kernel_, t18); - SetArg<19, T19>::set(kernel_, t19); - SetArg<20, T20>::set(kernel_, t20); - SetArg<21, T21>::set(kernel_, t21); - SetArg<22, T22>::set(kernel_, t22); - SetArg<23, T23>::set(kernel_, t23); - SetArg<24, T24>::set(kernel_, t24); - SetArg<25, T25>::set(kernel_, t25); - SetArg<26, T26>::set(kernel_, t26); - SetArg<27, T27>::set(kernel_, t27); - SetArg<28, T28>::set(kernel_, t28); - SetArg<29, T29>::set(kernel_, t29); - SetArg<30, T30>::set(kernel_, t30); - SetArg<31, T31>::set(kernel_, t31); - - args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; - } -}; - -//------------------------------------------------------------------------------------------------------ - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30, - typename T31> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30, - T31 arg31) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30, - arg31); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5, - T6); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4, - T5); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3, - typename T4> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3, - T4); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4); - } -}; - -template < - typename T0, - typename T1, - typename T2, - typename T3> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2, - T3); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3); - } -}; - -template < - typename T0, - typename T1, - typename T2> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1, - T2); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2); - } -}; - -template < - typename T0, - typename T1> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0, - T1); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0, - T1 arg1) - { - return functor_( - enqueueArgs, - arg0, - arg1); - } -}; - -template < - typename T0> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> - FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : functor_(functor) - { - -#if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); -#endif - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs &, - T0); - - Event operator()( - const EnqueueArgs &enqueueArgs, - T0 arg0) - { - return functor_( - enqueueArgs, - arg0); - } -}; - -} // namespace detail - -//---------------------------------------------------------------------------------------------- - -template < - typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, - typename T3 = detail::NullType, typename T4 = detail::NullType, - typename T5 = detail::NullType, typename T6 = detail::NullType, - typename T7 = detail::NullType, typename T8 = detail::NullType, - typename T9 = detail::NullType, typename T10 = detail::NullType, - typename T11 = detail::NullType, typename T12 = detail::NullType, - typename T13 = detail::NullType, typename T14 = detail::NullType, - typename T15 = detail::NullType, typename T16 = detail::NullType, - typename T17 = detail::NullType, typename T18 = detail::NullType, - typename T19 = detail::NullType, typename T20 = detail::NullType, - typename T21 = detail::NullType, typename T22 = detail::NullType, - typename T23 = detail::NullType, typename T24 = detail::NullType, - typename T25 = detail::NullType, typename T26 = detail::NullType, - typename T27 = detail::NullType, typename T28 = detail::NullType, - typename T29 = detail::NullType, typename T30 = detail::NullType, - typename T31 = detail::NullType> -struct make_kernel : public detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31> -{ - public: - typedef detail::KernelFunctorGlobal< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31> - FunctorType; - - make_kernel( - const Program &program, - const STRING_CLASS name, - cl_int *err = NULL) : detail::functionImplementation_( - FunctorType(program, name, err)) - { - } - - make_kernel( - const Kernel kernel) : detail::functionImplementation_( - FunctorType(kernel)) - { - } -}; - -//---------------------------------------------------------------------------------------------------------------------- - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_ARG_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_ERR -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR -#undef __SET_PRINTF_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __CL_EXPLICIT_CONSTRUCTORS - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __CL_FUNCTION_TYPE - -// Extensions -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_VERSION_1_1) -#undef __INIT_CL_EXT_FCN_PTR -#endif // #if defined(CL_VERSION_1_1) -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -#undef __DEFAULT_NOT_INITIALIZED -#undef __DEFAULT_BEING_INITIALIZED -#undef __DEFAULT_INITIALIZED - -#undef CL_HPP_RVALUE_REFERENCES_SUPPORTED -#undef CL_HPP_NOEXCEPT - -} // namespace cl - -#endif // CL_HPP_ diff --git a/src/crypto/argon2gpu/opencl/device.cpp b/src/crypto/argon2gpu/opencl/device.cpp deleted file mode 100644 index 70a80c0021..0000000000 --- a/src/crypto/argon2gpu/opencl/device.cpp +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/opencl/device.h" - -#include -#include -#include - -namespace argon2gpu -{ -namespace opencl -{ -std::string Device::getName() const -{ - return "OpenCL Device '" + device.getInfo() + "' (" + device.getInfo() + ")"; -} - -std::size_t Device::getTotalMemory() const -{ - return device.getInfo(); -} - -template -static std::ostream& printBitfield(std::ostream& out, T value, const std::vector >& lookup) -{ - bool first = true; - for (auto& entry : lookup) { - if (value & entry.first) { - if (!first) { - out << " | "; - } - first = false; - out << entry.second; - } - } - return out; -} - -template -static std::ostream& printEnum(std::ostream& out, T value, const std::unordered_map& lookup) -{ - try { - return out << lookup.at(value); - } catch (const std::out_of_range&) { - return out << ""; - } -} - -template -std::ostream& operator<<(std::ostream& out, const std::vector& vec) -{ - out << "["; - bool first = true; - for (T value : vec) { - if (!first) { - out << ", "; - } - first = false; - out << value; - } - return out << "]"; -} - -std::string Device::getInfo() const -{ - std::ostringstream out; - out << "OpenCL Device '" << device.getInfo() << "':" << std::endl; - out << " Type: "; - printBitfield(out, device.getInfo(), { - {CL_DEVICE_TYPE_CPU, "CPU"}, - {CL_DEVICE_TYPE_GPU, "GPU"}, - {CL_DEVICE_TYPE_ACCELERATOR, "Accelerator"}, - {CL_DEVICE_TYPE_DEFAULT, "Default"}, - }) - << std::endl; - out << " Available: " - << device.getInfo() << std::endl; - out << " Compiler available: " - << device.getInfo() << std::endl; - out << std::endl; - - out << " Version: " - << device.getInfo() << std::endl; - out << " OpenCL C Version: " - << device.getInfo() << std::endl; - out << " Extensions: " - << device.getInfo() << std::endl; - out << std::endl; - - out << " Vendor: " - << device.getInfo() << std::endl; - out << " Vendor ID: " - << device.getInfo() << std::endl; - out << std::endl; - - cl::Platform platform(device.getInfo()); - out << " Platform name: " - << platform.getInfo() << std::endl; - out << " Platform vendor: " - << platform.getInfo() << std::endl; - out << " Platform version: " - << platform.getInfo() << std::endl; - out << " Platform extensions: " - << platform.getInfo() << std::endl; - out << std::endl; - - out << " Driver version: " - << device.getInfo() << std::endl; - out << " Little-endian: " - << device.getInfo() << std::endl; - out << std::endl; - - out << " Max compute units: " - << device.getInfo() << std::endl; - out << " Max work-item dimensions: " - << device.getInfo() << std::endl; - out << " Max work-item sizes: " - << device.getInfo() << std::endl; - out << std::endl; - - out << " Max clock frequency: " - << device.getInfo() << " MHz" << std::endl; - out << std::endl; - - out << " Address bits: " - << device.getInfo() << std::endl; - out << " Max memory allocation size: " - << device.getInfo() << " bytes" << std::endl; - out << " Max parameter size: " - << device.getInfo() << " bytes" << std::endl; - out << " Memory base address alignment: " - << device.getInfo() << " bits" << std::endl; - out << " Min data type alignment: " - << device.getInfo() << " bytes" << std::endl; - out << std::endl; - - out << " Unified memory: " - << device.getInfo() << std::endl; - out << " Global memory cache type: "; - printEnum(out, device.getInfo(), {{CL_NONE, "None"}, {CL_READ_ONLY_CACHE, "Read-only"}, {CL_READ_WRITE_CACHE, "Read-write"}}) << std::endl; - out << " Global memory cacheline size: " - << device.getInfo() << " bytes" << std::endl; - out << " Global memory cache size: " - << device.getInfo() << " bytes" << std::endl; - out << " Global memory size: " - << device.getInfo() << " bytes" << std::endl; - out << std::endl; - - out << " Max constant buffer size: " - << device.getInfo() << " bytes" << std::endl; - out << " Max constant arguments: " - << device.getInfo() << std::endl; - out << std::endl; - - out << " Local memory type: "; - printEnum(out, device.getInfo(), { - {CL_LOCAL, "Dedicated"}, - {CL_GLOBAL, "Global"}, - }) - << std::endl; - out << " Local memory size: " - << device.getInfo() << " bytes" << std::endl; - out << std::endl; - - out << " Preferred vector width (char): " - << device.getInfo() - << " (native: " - << device.getInfo() - << ")" << std::endl; - out << " Preferred vector width (short): " - << device.getInfo() - << " (native: " - << device.getInfo() - << ")" << std::endl; - out << " Preferred vector width (int): " - << device.getInfo() - << " (native: " - << device.getInfo() - << ")" << std::endl; - out << " Preferred vector width (long): " - << device.getInfo() - << " (native: " - << device.getInfo() - << ")" << std::endl; - out << " Preferred vector width (float): " - << device.getInfo() - << " (native: " - << device.getInfo() - << ")" << std::endl; - out << " Preferred vector width (double): " - << device.getInfo() - << " (native: " - << device.getInfo() - << ")" << std::endl; - out << " Preferred vector width (half): " - << device.getInfo() - << " (native: " - << device.getInfo() - << ")" << std::endl; - out << std::endl; - - out << " Error correction supported: " - << device.getInfo() << std::endl; - out << " Profiling timer resolution: " - << device.getInfo() << " ns" << std::endl; - out << std::endl; - - out << " Execution capabilites: "; - printBitfield(out, device.getInfo(), { - {CL_EXEC_KERNEL, "OpenCL kernels"}, - {CL_EXEC_NATIVE_KERNEL, "Native kernels"}, - }) - << std::endl; - out << " Command queue properties: "; - printBitfield(out, device.getInfo(), { - {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, "Out-of-order execution"}, - {CL_QUEUE_PROFILING_ENABLE, "Profiling"}, - }) - << std::endl; - return out.str(); -} - -} // namespace opencl -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/opencl/device.h b/src/crypto/argon2gpu/opencl/device.h deleted file mode 100644 index b7f398e775..0000000000 --- a/src/crypto/argon2gpu/opencl/device.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2017-2021 Łukasz Kurowski - * Copyright (C) 2015 Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_OPENCL_DEVICE_H -#define ARGON2_OPENCL_DEVICE_H - -#include "crypto/argon2gpu/opencl/opencl.h" - -namespace argon2gpu -{ -namespace opencl -{ -class Device -{ -private: - cl::Device device; - -public: - std::string getName() const; - std::string getInfo() const; - std::size_t getTotalMemory() const; - - const cl::Device& getCLDevice() const { return device; } - - /** - * @brief Empty constructor. - * NOTE: Calling methods other than the destructor on an instance initialized - * with empty constructor results in undefined behavior. - */ - Device() {} - - Device(const cl::Device& device) - : device(device) - { - } - - Device(const Device&) = default; - Device(Device&&) = default; - - Device& operator=(const Device&) = default; -}; - -} // namespace opencl -} // namespace argon2gpu - -#endif // ARGON2_OPENCL_DEVICE_H diff --git a/src/crypto/argon2gpu/opencl/global-context.cpp b/src/crypto/argon2gpu/opencl/global-context.cpp deleted file mode 100644 index f5adf1c40b..0000000000 --- a/src/crypto/argon2gpu/opencl/global-context.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/opencl/global-context.h" - -#include - -namespace argon2gpu -{ -namespace opencl -{ -GlobalContext::GlobalContext() - : devices() -{ - std::vector platforms; - cl::Platform::get(&platforms); - - std::vector clDevices; - for (cl::Platform platform : platforms) { - try { - platform.getDevices(CL_DEVICE_TYPE_GPU, &clDevices); - devices.insert(devices.end(), clDevices.begin(), clDevices.end()); - } catch (const cl::Error& err) { - std::cerr << "WARNING: Unable to get devices for platform '" - << platform.getInfo() - << "' - error " << err.err() << std::endl; - } - } -} - -} // namespace opencl -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/opencl/global-context.h b/src/crypto/argon2gpu/opencl/global-context.h deleted file mode 100644 index 898ca9396a..0000000000 --- a/src/crypto/argon2gpu/opencl/global-context.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_OPENCL_GLOBALCONTEXT_H -#define ARGON2_OPENCL_GLOBALCONTEXT_H - -#include "crypto/argon2gpu/opencl/device.h" - -#include -#include - -namespace argon2gpu -{ -namespace opencl -{ -class GlobalContext -{ -private: - std::vector devices; - -public: - const std::vector& getAllDevices() const { return devices; } - - GlobalContext(); -}; - -} // namespace opencl -} // namespace argon2gpu - -#endif // ARGON2_OPENCL_GLOBALCONTEXT_H diff --git a/src/crypto/argon2gpu/opencl/kernel-loader.cpp b/src/crypto/argon2gpu/opencl/kernel-loader.cpp deleted file mode 100644 index a69aa825f0..0000000000 --- a/src/crypto/argon2gpu/opencl/kernel-loader.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/opencl/kernel-loader.h" -#include "crypto/argon2gpu/opencl/kernel.cl.h" - -#include -#include -#include - -namespace argon2gpu -{ -namespace opencl -{ -cl::Program KernelLoader::loadArgon2Program( - const cl::Context& context, - Type type, - Version version, - bool debug) -{ - std::stringstream buildOpts; - if (debug) { - buildOpts << "-g "; - } - buildOpts << "-DARGON2_TYPE=" << type << " "; - buildOpts << "-DARGON2_VERSION=" << version << " "; - - cl::Program prog(context, reinterpret_cast(code::kernel)); - try { - std::string opts = buildOpts.str(); - prog.build(opts.c_str()); - } catch (const cl::Error& err) { - std::cerr << "ERROR: Failed to build program:" << std::endl; - for (cl::Device& device : context.getInfo()) { - std::cerr << " Build log from device '" << device.getInfo() << "':" << std::endl; - std::cerr << prog.getBuildInfo(device); - } - throw; - } - return prog; -} - -} // namespace opencl -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/opencl/kernel-loader.h b/src/crypto/argon2gpu/opencl/kernel-loader.h deleted file mode 100644 index 8d4f30158f..0000000000 --- a/src/crypto/argon2gpu/opencl/kernel-loader.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_OPENCL_KERNELLOADER_H -#define ARGON2_OPENCL_KERNELLOADER_H - -#include "crypto/argon2gpu/common.h" -#include "crypto/argon2gpu/opencl/opencl.h" - -#include - -namespace argon2gpu -{ -namespace opencl -{ -namespace KernelLoader -{ -cl::Program loadArgon2Program( - const cl::Context& context, - Type type, - Version version, - bool debug = false); -}; - -} // namespace opencl -} // namespace argon2gpu - -#endif // ARGON2_OPENCL_KERNELLOADER_H diff --git a/src/crypto/argon2gpu/opencl/kernel-runner.cpp b/src/crypto/argon2gpu/opencl/kernel-runner.cpp deleted file mode 100644 index b599a6ce6d..0000000000 --- a/src/crypto/argon2gpu/opencl/kernel-runner.cpp +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (C) 2015-2021 Ehsan Dalvand , Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/opencl/kernel-runner.h" - -#include - -#define THREADS_PER_LANE 32 - -namespace argon2gpu -{ -namespace opencl -{ - -KernelRunner::KernelRunner(const ProgramContext* programContext, - const Argon2Params* params, - const Device* device, - std::uint32_t batchSize, - bool bySegment, - bool precompute) - : programContext(programContext), params(params), batchSize(batchSize), - bySegment(bySegment), precompute(precompute), - memorySize(params->getMemorySize() * static_cast(batchSize)) -{ - auto context = programContext->getContext(); - std::uint32_t passes = params->getTimeCost(); - std::uint32_t lanes = params->getLanes(); - std::uint32_t segmentBlocks = params->getSegmentBlocks(); - - queue = cl::CommandQueue(context, device->getCLDevice()); - - memoryBuffer = cl::Buffer(context, CL_MEM_READ_WRITE, memorySize); - - kernel = cl::Kernel(programContext->getProgram(), "argon2d_fill"); - kernel.setArg(1, memoryBuffer); - kernel.setArg(2, passes); - kernel.setArg(3, lanes); - kernel.setArg(4, segmentBlocks); - - inputBuffer = cl::Buffer(context, CL_MEM_WRITE_ONLY, 80); - resultBuffer = cl::Buffer(context, CL_MEM_READ_WRITE, sizeof(uint32_t)); - - kernelInit = cl::Kernel(programContext->getProgram(), "argon2d_initialize"); - kernelInit.setArg(0, memoryBuffer); - kernelInit.setArg(1, inputBuffer); - - kernelFinal = cl::Kernel(programContext->getProgram(), "argon2d_finalize"); - kernelFinal.setArg(0, memoryBuffer); - kernelFinal.setArg(1, resultBuffer); -} - -void KernelRunner::run(std::uint32_t lanesPerBlock, std::uint32_t jobsPerBlock) -{ - std::uint32_t lanes = params->getLanes(); - std::size_t shmemSize = THREADS_PER_LANE * lanesPerBlock * jobsPerBlock * sizeof(cl_uint) * 2; - - cl::NDRange globalRange{THREADS_PER_LANE * lanes, batchSize}; - cl::NDRange localRange{THREADS_PER_LANE * lanesPerBlock, jobsPerBlock}; - - kernel.setArg(0, {shmemSize}); - - queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalRange, localRange); -} - -void KernelRunner::init(const void* input){ - std::uint32_t umax = std::numeric_limits::max(); - queue.enqueueWriteBuffer(inputBuffer, true, 0, 80, input); - queue.enqueueWriteBuffer(resultBuffer, true, 0, sizeof(cl_uint), static_cast(&umax)); -} - -void KernelRunner::fillFirstBlocks(const std::uint32_t startNonce) -{ - std::uint32_t lanes = params->getLanes(); - std::uint32_t jobsPerBlock = (batchSize<16) ? 1 : 16; - cl::NDRange global{ lanes*2, batchSize }; - cl::NDRange local{ lanes*2, jobsPerBlock }; - - kernelInit.setArg(2, startNonce); - - queue.enqueueNDRangeKernel(kernelInit, cl::NullRange, global, local); - -} - -void KernelRunner::finalize(const std::uint32_t startNonce, const std::uint64_t target) -{ - std::uint32_t jobsPerBlock = (batchSize<8) ? 1 : 8; - std::size_t smem = 129 * sizeof(cl_ulong) * jobsPerBlock + 18 * sizeof(cl_ulong) * jobsPerBlock; - - cl::NDRange global{ 4, batchSize }; - cl::NDRange local{ 4, jobsPerBlock }; - - kernelFinal.setArg(2, cl::__local(smem)); - kernelFinal.setArg(3, startNonce); - kernelFinal.setArg(4, target); - - queue.enqueueNDRangeKernel(kernelFinal, cl::NullRange, global, local); - -} - - -std::uint32_t KernelRunner::readResultNonce() -{ - queue.enqueueReadBuffer(resultBuffer, true, 0, sizeof(cl_uint), static_cast(&res_nonce)); - return res_nonce; -} - -} // namespace opencl -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/opencl/kernel-runner.h b/src/crypto/argon2gpu/opencl/kernel-runner.h deleted file mode 100644 index 02198cbccc..0000000000 --- a/src/crypto/argon2gpu/opencl/kernel-runner.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2015-2021 Ehsan Dalvand , Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_OPENCL_KERNELRUNNER_H -#define ARGON2_OPENCL_KERNELRUNNER_H - -#include "crypto/argon2gpu/common.h" -#include "crypto/argon2gpu/opencl/program-context.h" - -#if defined(MAC_OSX) -#pragma clang diagnostic ignored "-Wunused-private-field" -#endif - -namespace argon2gpu -{ -namespace opencl -{ -class KernelRunner -{ -private: - const ProgramContext* programContext; - const Argon2Params* params; - - std::uint32_t batchSize; - bool bySegment; - bool precompute; - - cl::CommandQueue queue; - cl::Kernel kernel; - cl::Buffer memoryBuffer, refsBuffer; - cl::Event start, end; - - cl::Buffer inputBuffer; - cl::Buffer resultBuffer; - cl::Kernel kernelInit; - cl::Kernel kernelFinal; - - std::size_t memorySize; - std::uint32_t res_nonce; - -public: - std::uint32_t getMinLanesPerBlock() const - { - return bySegment ? 1 : params->getLanes(); - } - std::uint32_t getMaxLanesPerBlock() const { return params->getLanes(); } - - std::uint32_t getMinJobsPerBlock() const { return 1; } - std::uint32_t getMaxJobsPerBlock() const { return batchSize; } - - std::uint32_t getBatchSize() const { return batchSize; } - - KernelRunner(const ProgramContext* programContext, - const Argon2Params* params, - const Device* device, - std::uint32_t batchSize, - bool bySegment, - bool precompute); - - - void run(std::uint32_t lanesPerBlock, std::uint32_t jobsPerBlock); - void init(const void* input); - void fillFirstBlocks(const std::uint32_t startNonce); - void finalize(const std::uint32_t startNonce, const std::uint64_t target); - std::uint32_t readResultNonce(); -}; - -} // namespace opencl -} // namespace argon2gpu - -#endif // ARGON2_OPENCL_KERNELRUNNER_H diff --git a/src/crypto/argon2gpu/opencl/kernel.cl b/src/crypto/argon2gpu/opencl/kernel.cl deleted file mode 100644 index d57912c9a6..0000000000 --- a/src/crypto/argon2gpu/opencl/kernel.cl +++ /dev/null @@ -1,1766 +0,0 @@ -/* - * Copyright (C) 2018-2021 Ehsan Dalvand , Alireza Jahandideh - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#define ARGON2_BLOCK_SIZE 1024 -#define ARGON2_QWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 8) -#define BLAKE2B_BLOCKBYTES 128 -#define BLAKE2B_OUTBYTES 64 -#define ARGON2_PREHASH_DIGEST_LENGTH 64 -#define ARGON2_SYNC_POINTS 4 -#define THREADS_PER_LANE 32 -#define QWORDS_PER_THREAD (ARGON2_QWORDS_IN_BLOCK / 32) -#define ALGO_VERSION 0x10 - -enum algo_params { - ALGO_LANES = 8, - ALGO_MCOST = 500, - ALGO_PASSES = 2, - ALGO_OUTLEN = 32, - ALGO_TOTAL_BLOCKS = (ALGO_MCOST / (4 * ALGO_LANES)) * 4 * ALGO_LANES, - ALGO_LANE_LENGHT = ALGO_TOTAL_BLOCKS / ALGO_LANES, - ALGO_SEGMENT_BLOCKS = ALGO_LANE_LENGHT / 4 -}; - -typedef unsigned int uint32_t; -typedef unsigned long uint64_t; -typedef unsigned char uint8_t; - -struct block { - uint64_t v[ARGON2_QWORDS_IN_BLOCK]; -}; -struct partialState { - uint64_t a, b; -}; -struct uint64x8{ - uint64_t s0,s1,s2,s3,s4,s5,s6,s7; -}; - -#define SWAP4(x) as_uint(as_uchar4(x).wzyx) - -__constant uint64_t blake2b_Init[8] = -{ - 0x6A09E667F2BDC948,0xBB67AE8584CAA73B, - 0x3C6EF372FE94F82B,0xA54FF53A5F1D36F1, - 0x510E527FADE682D1,0x9B05688C2B3E6C1F, - 0x1F83D9ABFB41BD6B,0x5BE0CD19137E2179 -}; -__constant uint64_t blake2b_IV[8] = -{ - 7640891576956012808,13503953896175478587, - 4354685564936845355,11912009170470909681, - 5840696475078001361,11170449401992604703, - 2270897969802886507,6620516959819538809 -}; -__constant uint64_t sigma[12][2]= -{ - {506097522914230528,1084818905618843912}, - {436021270388410894,217587900856929281}, - {940973067642603531,290764780619369994}, - {1011915791265892615,580682894302053890}, - {1083683067090239497,937601969488068878}, - {218436676723543042,648815278989708548}, - {721716194318550284,794887571959580416}, - {649363922558061325,721145521830297605}, - {576464098234863366,363107122416517644}, - {360576072368521738,3672381957147407}, - {506097522914230528,1084818905618843912}, - {436021270388410894,217587900856929281}, -}; - -__constant uint64_t blake2b_Init_928[8] = -{ - 0x6A09E667F2BDC928,0xBB67AE8584CAA73B, - 0x3C6EF372FE94F82B,0xA54FF53A5F1D36F1, - 0x510E527FADE682D1,0x9B05688C2B3E6C1F, - 0x1F83D9ABFB41BD6B,0x5BE0CD19137E2179 -}; -#define initState(a) {\ - a[0]=0x6A09E667F2BDC948;\ - a[1]=0xBB67AE8584CAA73B;\ - a[2]=0x3C6EF372FE94F82B;\ - a[3]=0xA54FF53A5F1D36F1;\ - a[4]=0x510E527FADE682D1;\ - a[5]=0x9B05688C2B3E6C1F;\ - a[6]=0x1F83D9ABFB41BD6B;\ - a[7]=0x5BE0CD19137E2179;\ -} - -#define G(a,b,c,d,x,col) { \ - ref1=sigma[r][col]>>16*x;\ - ref2=sigma[r][col]>>(16*x+8);\ - v[a] += v[b]+upsample(m[ref1*2+1], m[ref1*2]); \ - v[d] = rotate64(v[d] ^ v[a],32); \ - v[c] += v[d]; \ - v[b] = rotate64(v[b] ^ v[c], 24); \ - v[a] +=v[b]+upsample(m[ref2*2+1], m[ref2*2]); \ - v[d] = rotate64( v[d] ^ v[a], 16); \ - v[c] += v[d]; \ - v[b] = rotate64( v[b] ^ v[c], 63); \ -} - -#define G_loop(a,b,c,d,x,col) { \ - ref1=sigma[r][col]>>16*x; \ - ref2=sigma[r][col]>>(16*x+8); \ - ref1=ref1&7; \ - ref2=ref2&7; \ - v[a] += v[b]+state[ref1]; \ - v[d] = rotate64(v[d] ^ v[a],32); \ - v[c] += v[d]; \ - v[b] = rotate64(v[b] ^ v[c], 24); \ - v[a] +=v[b]+state[ref2]; \ - v[d] = rotate64( v[d] ^ v[a], 16); \ - v[c] += v[d]; \ - v[b] = rotate64( v[b] ^ v[c], 63); \ -} -ulong u64_build(uint hi, uint lo) -{ - return upsample(hi, lo); -} - -uint u64_lo(ulong x) -{ - return (uint)x; -} - -uint u64_hi(ulong x) -{ - return (uint)(x >> 32); -} -void zero_buffer( __local uint32_t* buffer, const uint32_t idx){ - buffer[idx] =0; - buffer[idx+4] =0; - buffer[idx+8] =0; - buffer[idx+12]=0; - buffer[idx+16] =0; - buffer[idx+20] =0; - buffer[idx+24] =0; - buffer[idx+28]=0; -} - - -void enc32( void *pp, const uint32_t x) -{ - uint8_t *p = ( uint8_t *)pp; - - p[3] = x & 0xff; - p[2] = (x >> 8) & 0xff; - p[1] = (x >> 16) & 0xff; - p[0] = (x >> 24) & 0xff; -} - -uint64_t rotate64( uint64_t x, const uint32_t n) -{ - return (x >> n) | (x << (64 - n)); -} -void blake2b_compress_loop_1w( uint64_t* restrict state,__global struct block* memCell) -{ - uint64_t v[16]; - uint64_t s[8]; - - s[0]=state[0]; - s[1]=state[1]; - s[2]=state[2]; - s[3]=state[3]; - s[4]=state[4]; - s[5]=state[5]; - s[6]=state[6]; - s[7]=state[7]; - - for (int i=1;i<31;i++){ - initState(v); - - v[8] = blake2b_IV[0]; - v[9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = blake2b_IV[4] ^ 64; - v[13] = blake2b_IV[5]; - v[14] = blake2b_IV[6] ^(uint64_t) -1; - v[15] = blake2b_IV[7]; - - v[0] += v[4]+s[0]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]+s[1]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]+s[2]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]+s[3]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]+s[4]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]+s[5]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]+s[6]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]+s[7]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - - v[1] += v[6]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - - v[2] += v[7]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]+s[4]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]+s[6]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]+s[1]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - v[1] += v[6]+s[0]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]+s[2]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - v[2] += v[7]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]+s[7]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]+s[5]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]+s[3]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]+s[0]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]+s[5]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]+s[2]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - v[1] += v[6]+s[3]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]+s[6]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - - v[2] += v[7]+s[7]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]+s[1]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]+s[4]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]+s[7]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]+s[3]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]+s[1]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - - v[0] += v[5]+s[2]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]+s[6]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - - v[1] += v[6]+s[5]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - - v[2] += v[7]+s[4]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]+s[0]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - - v[3] += v[4]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]+s[0]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]+s[5]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]+s[7]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]+s[2]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]+s[4]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]+s[1]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - v[1] += v[6]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - v[2] += v[7]+s[6]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]+s[3]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]+s[2]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]+s[6]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]+s[0]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]+s[3]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]+s[4]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - - v[1] += v[6]+s[7]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]+s[5]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - v[2] += v[7]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]+s[1]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]+s[5]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]+s[1]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]+s[4]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]+s[0]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]+s[7]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - v[1] += v[6]+s[6]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]+s[3]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - v[2] += v[7]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]+s[2]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]+s[7]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]+s[1]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]+s[3]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]+s[5]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]+s[0]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - v[1] += v[6]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]+s[4]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - v[2] += v[7]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]+s[6]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]+s[2]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]+s[6]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]+s[3]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]+s[0]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]+s[2]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - v[1] += v[6]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]+s[7]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - v[2] += v[7]+s[1]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]+s[4]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]+s[5]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]+s[2]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]+s[4]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]+s[7]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]+s[6]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]+s[1]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]+s[5]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - v[1] += v[6]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - v[2] += v[7]+s[3]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]+s[0]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]+s[0]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]+s[1]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]+s[2]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]+s[3]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]+s[4]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]+s[5]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]+s[6]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]+s[7]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - v[1] += v[6]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - v[2] += v[7]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - - v[0] += v[4]; - v[12] = rotate64(v[12] ^ v[0],32); - v[8] += v[12]; - v[4] = rotate64(v[4] ^ v[8], 24); - v[0] +=v[4]; - v[12] = rotate64( v[12] ^ v[0], 16); - v[8] += v[12]; - v[4] = rotate64( v[4] ^ v[8], 63); - - - v[1] += v[5]+s[4]; - v[13] = rotate64(v[13] ^ v[1],32); - v[9] += v[13]; - v[5] = rotate64(v[5] ^ v[9], 24); - v[1] +=v[5]; - v[13] = rotate64( v[13] ^ v[1], 16); - v[9] += v[13]; - v[5] = rotate64( v[5] ^ v[9], 63); - - - v[2] += v[6]; - v[14] = rotate64(v[14] ^ v[2],32); - v[10] += v[14]; - v[6] = rotate64(v[6] ^ v[10], 24); - v[2] +=v[6]; - v[14] = rotate64( v[14] ^ v[2], 16); - v[10] += v[14]; - v[6] = rotate64( v[6] ^ v[10], 63); - - - v[3] += v[7]; - v[15] = rotate64(v[15] ^ v[3],32); - v[11] += v[15]; - v[7] = rotate64(v[7] ^ v[11], 24); - v[3] +=v[7]+s[6]; - v[15] = rotate64( v[15] ^ v[3], 16); - v[11] += v[15]; - v[7] = rotate64( v[7] ^ v[11], 63); - - - v[0] += v[5]+s[1]; - v[15] = rotate64(v[15] ^ v[0],32); - v[10] += v[15]; - v[5] = rotate64(v[5] ^ v[10], 24); - v[0] +=v[5]; - v[15] = rotate64( v[15] ^ v[0], 16); - v[10] += v[15]; - v[5] = rotate64( v[5] ^ v[10], 63); - - - v[1] += v[6]+s[0]; - v[12] = rotate64(v[12] ^ v[1],32); - v[11] += v[12]; - v[6] = rotate64(v[6] ^ v[11], 24); - v[1] +=v[6]+s[2]; - v[12] = rotate64( v[12] ^ v[1], 16); - v[11] += v[12]; - v[6] = rotate64( v[6] ^ v[11], 63); - - - v[2] += v[7]; - v[13] = rotate64(v[13] ^ v[2],32); - v[8] += v[13]; - v[7] = rotate64(v[7] ^ v[8], 24); - v[2] +=v[7]+s[7]; - v[13] = rotate64( v[13] ^ v[2], 16); - v[8] += v[13]; - v[7] = rotate64( v[7] ^ v[8], 63); - - - v[3] += v[4]+s[5]; - v[14] = rotate64(v[14] ^ v[3],32); - v[9] += v[14]; - v[4] = rotate64(v[4] ^ v[9], 24); - v[3] +=v[4]+s[3]; - v[14] = rotate64( v[14] ^ v[3], 16); - v[9] += v[14]; - v[4] = rotate64( v[4] ^ v[9], 63); - - s[0] = blake2b_Init[0] ^ v[0] ^ v[8]; - s[1] = blake2b_Init[1] ^ v[1] ^ v[9]; - s[2] = blake2b_Init[2] ^ v[2] ^ v[10]; - s[3] = blake2b_Init[3] ^ v[3] ^ v[11]; - s[4] = blake2b_Init[4] ^ v[4] ^ v[12]; - s[5] = blake2b_Init[5] ^ v[5] ^ v[13]; - s[6] = blake2b_Init[6] ^ v[6] ^ v[14]; - s[7] = blake2b_Init[7] ^ v[7] ^ v[15]; - - #pragma unroll - for (int j=0;j<4;j++) - memCell->v[j+i*4]=s[j]; - } - - for (int i=0;i<4;i++) - memCell->v[i+124]=s[i+4]; - -} - -void blake2b_compress_1w( - uint64_t* state, - const uint32_t* m, - const uint32_t step, - const bool lastChunk, - const size_t lastChunkSize) -{ - uint64_t v[16]; - - v[0] = state[0]; - v[1] = state[1]; - v[2] = state[2]; - v[3] = state[3]; - v[4] = state[4]; - v[5] = state[5]; - v[6] = state[6]; - v[7] = state[7]; - v[8] = blake2b_IV[0]; - v[9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - - if (lastChunk){ - v[12]= blake2b_IV[4] ^ (step-1)*BLAKE2B_BLOCKBYTES+lastChunkSize; - v[14]= blake2b_IV[6] ^(uint64_t) -1; - }else{ - v[12]= blake2b_IV[4] ^ step*BLAKE2B_BLOCKBYTES; - v[14]= blake2b_IV[6]; - } - v[13]= blake2b_IV[5]; - v[15]= blake2b_IV[7]; - - #pragma unroll - for(int r=0; r < 12; r++) - { - uint8_t ref1,ref2; - - G( 0, 4, 8, 12, 0, 0 ); - G( 1, 5, 9, 13, 1, 0 ); - G( 2, 6, 10, 14, 2, 0 ); - G( 3, 7, 11, 15, 3, 0 ); - G( 0, 5, 10, 15, 0, 1 ); - G( 1, 6, 11, 12, 1, 1 ); - G( 2, 7, 8, 13, 2, 1 ); - G( 3, 4, 9, 14, 3, 1 ); - } - state[0] ^= v[0] ^ v[8]; - state[1] ^= v[1] ^ v[9]; - state[2] ^= v[2] ^ v[10]; - state[3] ^= v[3] ^ v[11]; - state[4] ^= v[4] ^ v[12]; - state[5] ^= v[5] ^ v[13]; - state[6] ^= v[6] ^ v[14]; - state[7] ^= v[7] ^ v[15]; -} - -void computeInitialHash( - __global const uint32_t* input, - uint32_t* buffer, const uint32_t nonce) -{ - uint64_t state[8]; - initState(state); - for (int i=0;i<16;i++) - buffer[i]=0; - - buffer[0] = ALGO_LANES; - buffer[1] = ALGO_OUTLEN; - buffer[2] = ALGO_MCOST; - buffer[3] = ALGO_PASSES; - buffer[4] = ALGO_VERSION; - buffer[6] = 80; - - for (int i=0;i<19;i++) - buffer[7+i]=input[i]; - buffer[26] = nonce; - buffer[27] = 80; - for (int i=0;i<4;i++) - buffer[28+i]=input[i]; - blake2b_compress_1w( state, buffer, 1, false, 72); - for (int i=0;i<15;i++) - buffer[i]=input[i+4]; - buffer[15] = nonce; - for (int i=16;i<32;i++) - buffer[i]=0; - blake2b_compress_1w( state, buffer, 2, true, 72); - for (int i=0;i<16;i++) - buffer[i+1]=((uint32_t*)state)[i]; -} -void fillFirstBlock( - __global struct block* memory, - uint32_t* buffer) -{ - const uint32_t idx = get_local_id(0); - const uint32_t jobID = get_group_id(1)*get_local_size(1)+get_local_id(1); - uint32_t row = idx / ALGO_LANES; - uint32_t column = idx % ALGO_LANES; - __global struct block* memCell = memory + jobID * ALGO_TOTAL_BLOCKS + row * ALGO_LANES + column; - uint64_t state[8]; - initState(state); - buffer[0]=1024; - buffer[17] = row; - buffer[18] = column; - blake2b_compress_1w( state, buffer, 1, true, 76); - #pragma unroll - for (int j=0;j<4;j++) - memCell->v[j]=state[j]; -blake2b_compress_loop_1w(state,memCell); -} -__kernel void argon2d_initialize( - __global struct block* memory, - __global uint32_t* input, - const uint startNonce) -{ - const uint32_t idx = get_local_id(0); - uint32_t jobId = get_group_id(1)*get_local_size(1)+get_local_id(1); - const uint32_t nonce = jobId + startNonce; - uint32_t buffer[32]; - computeInitialHash( input, buffer, nonce); - fillFirstBlock(memory, buffer); -} - -struct u64_shuffle_buf { - uint lo[THREADS_PER_LANE]; - uint hi[THREADS_PER_LANE]; -}; -ulong u64_shuffle(ulong v, uint thread_src, uint thread, - __local struct u64_shuffle_buf *buf) -{ - uint lo = u64_lo(v); - uint hi = u64_hi(v); - buf->lo[thread] = lo; - buf->hi[thread] = hi; - barrier(CLK_LOCAL_MEM_FENCE); - lo = buf->lo[thread_src]; - hi = buf->hi[thread_src]; - return u64_build(hi, lo); -} -struct block_g { - ulong data[ARGON2_QWORDS_IN_BLOCK]; -}; -struct block_th { - ulong a, b, c, d; -}; -ulong cmpeq_mask(uint test, uint ref) -{ - uint x = -(uint)(test == ref); - return u64_build(x, x); -} -ulong block_th_get(const struct block_th *b, uint idx) -{ - ulong res = 0; - res ^= cmpeq_mask(idx, 0) & b->a; - res ^= cmpeq_mask(idx, 1) & b->b; - res ^= cmpeq_mask(idx, 2) & b->c; - res ^= cmpeq_mask(idx, 3) & b->d; - return res; -} -void block_th_set(struct block_th *b, uint idx, ulong v) -{ - b->a ^= cmpeq_mask(idx, 0) & (v ^ b->a); - b->b ^= cmpeq_mask(idx, 1) & (v ^ b->b); - b->c ^= cmpeq_mask(idx, 2) & (v ^ b->c); - b->d ^= cmpeq_mask(idx, 3) & (v ^ b->d); -} -void move_block(struct block_th *dst, const struct block_th *src) -{ - *dst = *src; -} -void xor_block(struct block_th *dst, const struct block_th *src) -{ - dst->a ^= src->a; - dst->b ^= src->b; - dst->c ^= src->c; - dst->d ^= src->d; -} -void load_block(struct block_th *dst, __global const struct block_g *src, - uint thread) -{ - dst->a = src->data[0 * THREADS_PER_LANE + thread]; - dst->b = src->data[1 * THREADS_PER_LANE + thread]; - dst->c = src->data[2 * THREADS_PER_LANE + thread]; - dst->d = src->data[3 * THREADS_PER_LANE + thread]; -} -void load_block_xor(struct block_th *dst, __global const struct block_g *src, - uint thread) -{ - dst->a ^= src->data[0 * THREADS_PER_LANE + thread]; - dst->b ^= src->data[1 * THREADS_PER_LANE + thread]; - dst->c ^= src->data[2 * THREADS_PER_LANE + thread]; - dst->d ^= src->data[3 * THREADS_PER_LANE + thread]; -} -void store_block(__global struct block_g *dst, const struct block_th *src, - uint thread) -{ - dst->data[0 * THREADS_PER_LANE + thread] = src->a; - dst->data[1 * THREADS_PER_LANE + thread] = src->b; - dst->data[2 * THREADS_PER_LANE + thread] = src->c; - dst->data[3 * THREADS_PER_LANE + thread] = src->d; -} -#ifdef cl_amd_media_ops -#pragma OPENCL EXTENSION cl_amd_media_ops : enable -ulong rotr64(ulong x, ulong n) -{ - uint lo = u64_lo(x); - uint hi = u64_hi(x); - uint r_lo, r_hi; - if (n < 32) { - r_lo = amd_bitalign(hi, lo, (uint)n); - r_hi = amd_bitalign(lo, hi, (uint)n); - } else { - r_lo = amd_bitalign(lo, hi, (uint)n - 32); - r_hi = amd_bitalign(hi, lo, (uint)n - 32); - } - return u64_build(r_hi, r_lo); -} -#else -ulong rotr64(ulong x, ulong n) -{ - return rotate(x, 64 - n); -} -#endif -ulong f(ulong x, ulong y) -{ - uint xlo = u64_lo(x); - uint ylo = u64_lo(y); - return x + y + 2 * u64_build(mul_hi(xlo, ylo), xlo * ylo); -} -void g(struct block_th *block) -{ - ulong a, b, c, d; - a = block->a; - b = block->b; - c = block->c; - d = block->d; - a = f(a, b); - d = rotr64(d ^ a, 32); - c = f(c, d); - b = rotr64(b ^ c, 24); - a = f(a, b); - d = rotr64(d ^ a, 16); - c = f(c, d); - b = rotr64(b ^ c, 63); - block->a = a; - block->b = b; - block->c = c; - block->d = d; -} -uint apply_shuffle_shift1(uint thread, uint idx) -{ - return (thread & 0x1c) | ((thread + idx) & 0x3); -} -uint apply_shuffle_unshift1(uint thread, uint idx) -{ - idx = (QWORDS_PER_THREAD - idx) % QWORDS_PER_THREAD; - return apply_shuffle_shift1(thread, idx); -} -uint apply_shuffle_shift2(uint thread, uint idx) -{ - uint lo = (thread & 0x1) | ((thread & 0x10) >> 3); - lo = (lo + idx) & 0x3; - return ((lo & 0x2) << 3) | (thread & 0xe) | (lo & 0x1); -} -uint apply_shuffle_unshift2(uint thread, uint idx) -{ - idx = (QWORDS_PER_THREAD - idx) % QWORDS_PER_THREAD; - - return apply_shuffle_shift2(thread, idx); -} -void shuffle_shift1(struct block_th *block, uint thread, - __local struct u64_shuffle_buf *buf) -{ - for (uint i = 0; i < QWORDS_PER_THREAD; i++) { - uint src_thr = apply_shuffle_shift1(thread, i); - - ulong v = block_th_get(block, i); - v = u64_shuffle(v, src_thr, thread, buf); - block_th_set(block, i, v); - } -} -void shuffle_unshift1(struct block_th *block, uint thread, - __local struct u64_shuffle_buf *buf) -{ - for (uint i = 0; i < QWORDS_PER_THREAD; i++) { - uint src_thr = apply_shuffle_unshift1(thread, i); - - ulong v = block_th_get(block, i); - v = u64_shuffle(v, src_thr, thread, buf); - block_th_set(block, i, v); - } -} -void shuffle_shift2(struct block_th *block, uint thread, - __local struct u64_shuffle_buf *buf) -{ - for (uint i = 0; i < QWORDS_PER_THREAD; i++) { - uint src_thr = apply_shuffle_shift2(thread, i); - - ulong v = block_th_get(block, i); - v = u64_shuffle(v, src_thr, thread, buf); - block_th_set(block, i, v); - } -} -void shuffle_unshift2(struct block_th *block, uint thread, - __local struct u64_shuffle_buf *buf) -{ - for (uint i = 0; i < QWORDS_PER_THREAD; i++) { - uint src_thr = apply_shuffle_unshift2(thread, i); - - ulong v = block_th_get(block, i); - v = u64_shuffle(v, src_thr, thread, buf); - block_th_set(block, i, v); - } -} -void transpose(struct block_th *block, uint thread, - __local struct u64_shuffle_buf *buf) -{ - uint thread_group = (thread & 0x0C) >> 2; - for (uint i = 1; i < QWORDS_PER_THREAD; i++) { - uint thr = (i << 2) ^ thread; - uint idx = thread_group ^ i; - - ulong v = block_th_get(block, idx); - v = u64_shuffle(v, thr, thread, buf); - block_th_set(block, idx, v); - } -} -void shuffle_block(struct block_th *block, uint thread, - __local struct u64_shuffle_buf *buf) -{ - transpose(block, thread, buf); - g(block); - shuffle_shift1(block, thread, buf); - g(block); - shuffle_unshift1(block, thread, buf); - transpose(block, thread, buf); - g(block); - shuffle_shift2(block, thread, buf); - g(block); - shuffle_unshift2(block, thread, buf); -} -void compute_ref_pos(uint lanes, uint segment_blocks, - uint pass, uint lane, uint slice, uint offset, - uint *ref_lane, uint *ref_index) -{ - uint lane_blocks = ARGON2_SYNC_POINTS * segment_blocks; - *ref_lane = *ref_lane % lanes; - uint base; - if (pass != 0) { - base = lane_blocks - segment_blocks; - } else { - if (slice == 0) { - *ref_lane = lane; - } - base = slice * segment_blocks; - } - uint ref_area_size = base + offset - 1; - if (*ref_lane != lane) { - ref_area_size = min(ref_area_size, base); - } - *ref_index = mul_hi(*ref_index, *ref_index); - *ref_index = ref_area_size - 1 - mul_hi(ref_area_size, *ref_index); - if (pass != 0 && slice != ARGON2_SYNC_POINTS - 1) { - *ref_index += (slice + 1) * segment_blocks; - if (*ref_index >= lane_blocks) { - *ref_index -= lane_blocks; - } - } -} -void argon2_core( - __global struct block_g *memory, __global struct block_g *mem_curr, - struct block_th *prev, struct block_th *tmp, - __local struct u64_shuffle_buf *shuffle_buf, uint lanes, - uint thread, uint pass, uint ref_index, uint ref_lane) -{ - __global struct block_g *mem_ref; - mem_ref = memory + ref_index * lanes + ref_lane; - -#if ALGO_VERSION == 0x10 - load_block_xor(prev, mem_ref, thread); - move_block(tmp, prev); -#else - if (pass != 0) - { - load_block(tmp, mem_curr, thread); - load_block_xor(prev, mem_ref, thread); - xor_block(tmp, prev); - } - else - { - load_block_xor(prev, mem_ref, thread); - move_block(tmp, prev); - } -#endif - - shuffle_block(prev, thread, shuffle_buf); - xor_block(prev, tmp); - store_block(mem_curr, prev, thread); -} -void argon2_step( - __global struct block_g *memory, __global struct block_g *mem_curr, - struct block_th *prev, struct block_th *tmp, struct block_th *addr, - __local struct u64_shuffle_buf *shuffle_buf, - uint lanes, uint segment_blocks, uint thread, uint *thread_input, - uint lane, uint pass, uint slice, uint offset) -{ - uint ref_index, ref_lane; - ulong v = u64_shuffle(prev->a, 0, thread, shuffle_buf); - ref_index = u64_lo(v); - ref_lane = u64_hi(v); - compute_ref_pos(lanes, segment_blocks, pass, lane, slice, offset, - &ref_lane, &ref_index); - argon2_core(memory, mem_curr, prev, tmp, shuffle_buf, lanes, thread, pass, - ref_index, ref_lane); -} -__kernel void argon2d_fill( - __local struct u64_shuffle_buf *shuffle_bufs, - __global struct block_g *memory, uint passes, uint lanes, - uint segment_blocks) -{ - uint job_id = get_global_id(1); - uint lane = get_global_id(0) / THREADS_PER_LANE; - uint warp = get_local_id(0) / THREADS_PER_LANE; - uint thread = get_local_id(0) % THREADS_PER_LANE; - __local struct u64_shuffle_buf *shuffle_buf = &shuffle_bufs[warp]; - uint lane_blocks = ARGON2_SYNC_POINTS * segment_blocks; - memory += (size_t)job_id * lanes * lane_blocks; - struct block_th prev, addr, tmp; - uint thread_input; - __global struct block_g *mem_lane = memory + lane; - __global struct block_g *mem_prev = mem_lane + 1 * lanes; - __global struct block_g *mem_curr = mem_lane + 2 * lanes; - load_block(&prev, mem_prev, thread); - uint skip = 2; - for (uint pass = 0; pass < passes; ++pass) { - for (uint slice = 0; slice < ARGON2_SYNC_POINTS; ++slice) { - for (uint offset = 0; offset < segment_blocks; ++offset) { - if (skip > 0) { - --skip; - continue; - } - argon2_step(memory, mem_curr, &prev, &tmp, &addr, shuffle_buf, - lanes, segment_blocks, thread, &thread_input, - lane, pass, slice, offset); - mem_curr += lanes; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - mem_curr = mem_lane; - } - - __global uint* memLane = (__global uint*) ( memory + lanes * ( lane_blocks - 1 ) ) ; - - thread = get_local_id(0); - uint buf = 0; - for (uint i=0; i<8; i++){ - buf ^= memLane[thread+256*i]; - } - memLane[thread] = buf; -} -void g_shuffle( - const uint32_t r, - __local uint64_t* a, - __local uint64_t* b, - __local uint64_t* c, - __local uint64_t* d, - __local uint64_t* m1, - __local uint64_t* m2 ) -{ - *a = *a + *b + *m1; - *d = rotate64(*d ^ *a, 32); - *c = *c + *d; - *b = rotate64(*b ^ *c, 24); - *a = *a + *b + *m2; - *d = rotate64(*d ^ *a, 16); - *c = *c + *d; - *b = rotate64(*b ^ *c, 63); - -} -void load_block_fin( __global uint32_t* block, __local uint32_t* buffer, uint32_t idx){ - uint32_t i,j; - for(i=0;i<64;i++){ - j=idx+i*4; - buffer[j]=block[j]; - } -} -void blake2b_compress_final( - struct partialState* state, - __local uint64_t* m, - __local uint64_t* buffer, - uint32_t step, - uint32_t idx) -{ - uint64_t counter=(idx==0?step:0); - buffer[idx] = state->a; - buffer[idx+4] = state->b; - buffer[idx+8] = blake2b_IV[idx]; - if(idx==0) - buffer[idx+12] = blake2b_IV[4] ^ (step-1)*BLAKE2B_BLOCKBYTES+4; - else if(idx==2) - buffer[idx+12] = blake2b_IV[6] ^ (uint64_t) -1; - else - buffer[idx+12] = blake2b_IV[idx+4]; - barrier(CLK_LOCAL_MEM_FENCE); - for (uint32_t r = 0; r < 12; ++r) { - uint8_t ref1,ref2; - ref1 = sigma[r][0]>>16*idx; - ref2 = sigma[r][0]>>(16*idx+8); - g_shuffle(r, &buffer[idx], &buffer[idx+4], &buffer[idx+8], &buffer[idx+12], &m[ref1],&m[ref2] ); - ref1=sigma[r][1]>>16*idx; - ref2=sigma[r][1]>>(16*idx+8); - g_shuffle(r, &buffer[idx], &buffer[(idx+1)%4 +4], &buffer[(idx+2)%4+8], &buffer[(idx+3)%4+12], &m[ref1],&m[ref2] ); - } - state->a = state->a ^ buffer[idx] ^ buffer[idx+8]; - state->b = state->b ^ buffer[idx+4] ^ buffer[idx+12]; -} -void blake2b_compress( - struct partialState* state, - __local uint64_t* m, - __local uint64_t* buffer, - uint32_t step, - uint32_t idx) -{ - uint64_t counter=(idx==0?step:0); - buffer[idx] = state->a; - buffer[idx+4] = state->b; - buffer[idx+8] = blake2b_IV[idx]; - buffer[idx+12] = blake2b_IV[idx+4]^ counter*BLAKE2B_BLOCKBYTES; - barrier(CLK_LOCAL_MEM_FENCE); - for (uint32_t r = 0; r < 12; ++r) { - uint8_t ref1,ref2; - ref1 = sigma[r][0]>>16*idx; - ref2 = sigma[r][0]>>(16*idx+8); - g_shuffle(r, &buffer[idx], &buffer[idx+4], &buffer[idx+8], &buffer[idx+12], &m[ref1],&m[ref2] ); - ref1=sigma[r][1]>>16*idx; - ref2=sigma[r][1]>>(16*idx+8); - g_shuffle(r, &buffer[idx], &buffer[(idx+1)%4 +4], &buffer[(idx+2)%4+8], &buffer[(idx+3)%4+12], &m[ref1],&m[ref2] ); - } - state->a = state->a ^ buffer[idx] ^ buffer[idx+8]; - state->b = state->b ^ buffer[idx+4] ^ buffer[idx+12]; -} - -__kernel void argon2d_finalize( - __global struct block* memory, - __global uint32_t* output, - __local uint64_t* smem, - const uint32_t startNonce, - const uint64_t target - - ) -{ - uint32_t idx = get_local_id(0); - uint32_t jobId = get_group_id(1)*get_local_size(1)+get_local_id(1); - const uint32_t nonce = startNonce + jobId; - - __global uint32_t* memLane = (__global uint32_t*)((memory+jobId*ALGO_TOTAL_BLOCKS)+ALGO_LANES*(ALGO_LANE_LENGHT-1)); - __local uint64_t* input = &smem[129*get_local_id(1)]; - __local uint64_t* buffer= (__local uint64_t*)&smem[129*get_local_size(1)+get_local_id(1)*18]; - __local uint32_t* input_32 = (__local uint32_t*)input; - - load_block_fin(memLane,&input_32[1],idx); - - input_32[0]=32; - struct partialState state; - state.a = blake2b_Init_928[idx]; - state.b = blake2b_Init_928[idx+4]; - - blake2b_compress(&state,&input[0],buffer,1,idx); - blake2b_compress(&state,&input[16],buffer,2,idx); - blake2b_compress(&state,&input[32],buffer,3,idx); - blake2b_compress(&state,&input[48],buffer,4,idx); - blake2b_compress(&state,&input[64],buffer,5,idx); - blake2b_compress(&state,&input[80],buffer,6,idx); - blake2b_compress(&state,&input[96],buffer,7,idx); - blake2b_compress(&state,&input[112],buffer,8,idx); - - zero_buffer(&input_32[0], idx); - input_32[0] = input_32[256]; - - blake2b_compress_final(&state,&input[0],buffer,9,idx); - - barrier(CLK_LOCAL_MEM_FENCE); - - if ( state.a <= target && idx==3) { - output[0] = nonce; - } -} \ No newline at end of file diff --git a/src/crypto/argon2gpu/opencl/opencl.h b/src/crypto/argon2gpu/opencl/opencl.h deleted file mode 100644 index ed58b681de..0000000000 --- a/src/crypto/argon2gpu/opencl/opencl.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef OPENCL_H -#define OPENCL_H - -/* Some compatibility hacks: */ -#define CL_USE_DEPRECATED_OPENCL_1_1_APIS -#define CL_USE_DEPRECATED_OPENCL_1_2_APIS -#if defined(__APPLE__) || defined(__MACOSX) -#include -#else -#include -#endif -#undef CL_VERSION_2_0 - -/* Throw exceptions on errors: */ -#define __CL_ENABLE_EXCEPTIONS -/* Include local version of - * because not all platforms ship with it: */ -#include "crypto/argon2gpu/opencl/cl.hpp" - -#endif // OPENCL_H diff --git a/src/crypto/argon2gpu/opencl/processing-unit.cpp b/src/crypto/argon2gpu/opencl/processing-unit.cpp deleted file mode 100644 index 7f26c5f11b..0000000000 --- a/src/crypto/argon2gpu/opencl/processing-unit.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2015-2021 Ehsan Dalvand , Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include "crypto/argon2gpu/opencl/processing-unit.h" - -namespace argon2gpu -{ -namespace opencl -{ - -ProcessingUnit::ProcessingUnit( - const ProgramContext* programContext, - const Argon2Params* params, - const Device* device, - std::size_t batchSize, - bool bySegment, - bool precomputeRefs) - : programContext(programContext), params(params), device(device), - runner(programContext, params, device, batchSize, bySegment, precomputeRefs), - bestLanesPerBlock(runner.getMinLanesPerBlock()), - bestJobsPerBlock(runner.getMinJobsPerBlock()){} - - -std::uint32_t ProcessingUnit::scanNonces( - const void* input, const std::uint32_t startNonce, - const std::uint64_t target) -{ - runner.init(input); - runner.fillFirstBlocks(startNonce); - runner.run(bestLanesPerBlock, bestJobsPerBlock); - runner.finalize(startNonce, target); - return runner.readResultNonce(); -} - -} // namespace opencl -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/opencl/processing-unit.h b/src/crypto/argon2gpu/opencl/processing-unit.h deleted file mode 100644 index b29f418f0d..0000000000 --- a/src/crypto/argon2gpu/opencl/processing-unit.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2015-2021 Ehsan Dalvand , Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#ifndef ARGON2_OPENCL_PROCESSINGUNIT_H -#define ARGON2_OPENCL_PROCESSINGUNIT_H - -#include -#include "crypto/argon2gpu/opencl/kernel-runner.h" - -#if defined(MAC_OSX) -#pragma clang diagnostic ignored "-Wunused-private-field" -#endif - -namespace argon2gpu -{ -namespace opencl -{ -class ProcessingUnit -{ -private: - const ProgramContext* programContext; - const Argon2Params* params; - const Device* device; - - KernelRunner runner; - std::uint32_t bestLanesPerBlock; - std::uint32_t bestJobsPerBlock; - -public: - std::size_t getBatchSize() const { return runner.getBatchSize(); } - - ProcessingUnit( - const ProgramContext* programContext, - const Argon2Params* params, - const Device* device, - std::size_t batchSize, - bool bySegment = true, - bool precomputeRefs = false); - - std::uint32_t scanNonces( - const void* input, const std::uint32_t startNonce, - const std::uint64_t target); -}; - -} // namespace opencl -} // namespace argon2gpu - -#endif // ARGON2_OPENCL_PROCESSINGUNIT_H diff --git a/src/crypto/argon2gpu/opencl/program-context.cpp b/src/crypto/argon2gpu/opencl/program-context.cpp deleted file mode 100644 index ea50c5d253..0000000000 --- a/src/crypto/argon2gpu/opencl/program-context.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/argon2gpu/opencl/program-context.h" -#include "crypto/argon2gpu/opencl/kernel-loader.h" - -namespace argon2gpu -{ -namespace opencl -{ -ProgramContext::ProgramContext( - const GlobalContext* globalContext, - const std::vector& devices, - Type type, - Version version) - : globalContext(globalContext), devices(), type(type), version(version) -{ - this->devices.reserve(devices.size()); - for (auto& device : devices) { - this->devices.push_back(device.getCLDevice()); - } - context = cl::Context(this->devices); - program = KernelLoader::loadArgon2Program(context, type, version); -} - -} // namespace opencl -} // namespace argon2gpu diff --git a/src/crypto/argon2gpu/opencl/program-context.h b/src/crypto/argon2gpu/opencl/program-context.h deleted file mode 100644 index b4fa19597e..0000000000 --- a/src/crypto/argon2gpu/opencl/program-context.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (C) 2015-2021 Łukasz Kurowski , Ondrej Mosnacek - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation: either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef ARGON2_OPENCL_PROGRAMCONTEXT_H -#define ARGON2_OPENCL_PROGRAMCONTEXT_H - -#include "crypto/argon2gpu/opencl/global-context.h" -#include "crypto/argon2gpu/common.h" - -namespace argon2gpu -{ -namespace opencl -{ -class ProgramContext -{ -private: - const GlobalContext* globalContext; - - std::vector devices; - cl::Context context; - cl::Program program; - - Type type; - Version version; - -public: - const GlobalContext* getGlobalContext() const { return globalContext; } - - const std::vector& getDevices() const { return devices; } - const cl::Context& getContext() const { return context; } - const cl::Program& getProgram() const { return program; } - - Type getArgon2Type() const { return type; } - Version getArgon2Version() const { return version; } - - ProgramContext( - const GlobalContext* globalContext, - const std::vector& devices, - Type type, - Version version); -}; - -} // namespace opencl -} // namespace argon2gpu - -#endif // ARGON2_OPENCL_PROGRAMCONTEXT_H From 43af20dba5b1d4606cf6ffda5a664ffbd1db5120 Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <6098974-kittywhiskers@users.noreply.gitlab.com> Date: Sun, 16 May 2021 22:20:55 +0530 Subject: [PATCH 3/4] core: remove unused miner logic --- src/miner/impl/miner-cpu.cpp | 28 ------ src/miner/impl/miner-cpu.h | 36 ------- src/miner/impl/miner-gpu.cpp | 51 ---------- src/miner/impl/miner-gpu.h | 71 ------------- src/miner/internal/hash-rate-counter.cpp | 38 ------- src/miner/internal/hash-rate-counter.h | 46 --------- src/miner/internal/miner-base.cpp | 121 ----------------------- src/miner/internal/miner-base.h | 59 ----------- src/miner/internal/miner-context.cpp | 30 ------ src/miner/internal/miner-context.h | 102 ------------------- src/miner/internal/miners-controller.cpp | 103 ------------------- src/miner/internal/miners-controller.h | 120 ---------------------- src/miner/internal/miners-group.h | 49 --------- src/miner/internal/thread-group.h | 86 ---------------- src/miner/miner.cpp | 88 ----------------- src/miner/miner.h | 41 -------- 16 files changed, 1069 deletions(-) delete mode 100644 src/miner/impl/miner-cpu.cpp delete mode 100644 src/miner/impl/miner-cpu.h delete mode 100644 src/miner/impl/miner-gpu.cpp delete mode 100644 src/miner/impl/miner-gpu.h delete mode 100644 src/miner/internal/hash-rate-counter.cpp delete mode 100644 src/miner/internal/hash-rate-counter.h delete mode 100644 src/miner/internal/miner-base.cpp delete mode 100644 src/miner/internal/miner-base.h delete mode 100644 src/miner/internal/miner-context.cpp delete mode 100644 src/miner/internal/miner-context.h delete mode 100644 src/miner/internal/miners-controller.cpp delete mode 100644 src/miner/internal/miners-controller.h delete mode 100644 src/miner/internal/miners-group.h delete mode 100644 src/miner/internal/thread-group.h delete mode 100644 src/miner/miner.cpp delete mode 100644 src/miner/miner.h diff --git a/src/miner/impl/miner-cpu.cpp b/src/miner/impl/miner-cpu.cpp deleted file mode 100644 index 2bad4a1db6..0000000000 --- a/src/miner/impl/miner-cpu.cpp +++ /dev/null @@ -1,28 +0,0 @@ - -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include "miner/impl/miner-cpu.h" -#include "primitives/block.h" - - -CPUMiner::CPUMiner(MinerContextRef ctx, std::size_t device_index) - : MinerBase(ctx, device_index){}; - -int64_t CPUMiner::TryMineBlock(CBlock& block) -{ - int64_t hashes_done = 0; - while (true) { - uint256 hash = block.GetHash(); - if (UintToArith256(hash) <= _hash_target) { - this->ProcessFoundSolution(block, hash); - break; - } - block.nNonce += 1; - hashes_done += 1; - if ((block.nNonce & 0xFF) == 0) - break; - } - return hashes_done; -} diff --git a/src/miner/impl/miner-cpu.h b/src/miner/impl/miner-cpu.h deleted file mode 100644 index 1ae3764481..0000000000 --- a/src/miner/impl/miner-cpu.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef DYNAMIC_MINER_IMPL_CPU_H -#define DYNAMIC_MINER_IMPL_CPU_H - -#include "miner/internal/miner-base.h" - - -/** - * Dynamic CPU miner. - */ -class CPUMiner final : public MinerBase -{ -public: - CPUMiner(MinerContextRef ctx, std::size_t device_index); - virtual ~CPUMiner() = default; - - static unsigned int TotalDevices() - { - // Currently it's one until I'll solve - // physical CPU vs CPU core count vs vCPU count - // and get it fixed on Windows, OSx and Linux - // Until then we will just use OS scheduler - // (not a priority at all; here just for GPU) - return 1; - }; - - virtual const char* DeviceName() override { return "CPU"; }; - -protected: - virtual int64_t TryMineBlock(CBlock& block) override; -}; - -#endif // DYNAMIC_MINER_IMPL_CPU_H diff --git a/src/miner/impl/miner-gpu.cpp b/src/miner/impl/miner-gpu.cpp deleted file mode 100644 index dc98f00fa1..0000000000 --- a/src/miner/impl/miner-gpu.cpp +++ /dev/null @@ -1,51 +0,0 @@ - -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include "miner/impl/miner-gpu.h" -#include "util.h" - -#ifdef ENABLE_GPU -#include "miner/internal/miner-context.h" -#include "primitives/block.h" - - -GPUMiner::GPUMiner(MinerContextRef ctx, std::size_t device_index) - : MinerBase(ctx, device_index), - _global(), - _params((std::size_t)OUTPUT_BYTES, 2, 500, 8), - _device(_global.getAllDevices()[device_index]), - _context(&_global, {_device}, argon2gpu::ARGON2_D, argon2gpu::ARGON2_VERSION_10), - _batch_size_target(((_device.getTotalMemory() / 0x13F332) / 16) * 16), - _processing_unit(&_context, &_params, &_device, _batch_size_target, false, false) {} - -int64_t GPUMiner::TryMineBlock(CBlock& block) -{ - static unsigned char pblank[1]; - const auto _begin = BEGIN(block.nVersion); - const auto _end = END(block.nNonce); - const void* input = (_begin == _end ? pblank : static_cast(&_begin[0])); - const std::uint64_t device_target = ArithToUint256(_hash_target).GetUint64(3); - std::uint32_t start_nonce = block.nNonce; - - //Increase nNonce for the next batch - block.nNonce += _batch_size_target; - - std::uint32_t result_nonce = _processing_unit.scanNonces(input, start_nonce, device_target); - - if ( result_nonce < std::numeric_limits::max()){ - block.nNonce = result_nonce; - uint256 cpuHash = block.GetHash(); - if (UintToArith256(cpuHash) <= _hash_target) { - LogPrintf("Dynamic GPU Miner Found Nonce %u \n", block.nNonce); - this->ProcessFoundSolution(block, cpuHash); - }else{ - LogPrintf("Dynamic GPU Miner False Nonce %u \n", block.nNonce); - } - - } - return _batch_size_target; -} - -#endif // ENABLE_GPU diff --git a/src/miner/impl/miner-gpu.h b/src/miner/impl/miner-gpu.h deleted file mode 100644 index 1186cc6f03..0000000000 --- a/src/miner/impl/miner-gpu.h +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef DYNAMIC_MINER_IMPL_GPU_H -#define DYNAMIC_MINER_IMPL_GPU_H - -#if defined(HAVE_CONFIG_H) -#include "config/dynamic-config.h" -#endif - -#ifdef ENABLE_GPU -#include "crypto/argon2gpu/common.h" -#include "miner/internal/miner-base.h" - -#if HAVE_CUDA -#include "crypto/argon2gpu/cuda/cuda-exception.h" -#include "crypto/argon2gpu/cuda/processing-unit.h" -#else -#include "crypto/argon2gpu/opencl/opencl.h" -#include "crypto/argon2gpu/opencl/processing-unit.h" -#endif - - -class MinerContext; - -namespace gpu -{ -using Params = argon2gpu::Argon2Params; - -#if HAVE_CUDA -using ProcessingUnit = argon2gpu::cuda::ProcessingUnit; -using Device = argon2gpu::cuda::Device; -using Context = argon2gpu::cuda::GlobalContext; -using ProgramContext = argon2gpu::cuda::ProgramContext; -#else -using ProcessingUnit = argon2gpu::opencl::ProcessingUnit; -using Device = argon2gpu::opencl::Device; -using Context = argon2gpu::opencl::GlobalContext; -using ProgramContext = argon2gpu::opencl::ProgramContext; -#endif -} // namespace gpu - -class GPUMiner final : public MinerBase -{ -public: - GPUMiner(MinerContextRef ctx, std::size_t device_index); - virtual ~GPUMiner() = default; - - static unsigned int TotalDevices() - { - gpu::Context global; - return global.getAllDevices().size(); - }; - - virtual const char* DeviceName() override { return "GPU"; }; - -protected: - virtual int64_t TryMineBlock(CBlock& block) override; - -private: - gpu::Context _global; - gpu::Params _params; - gpu::Device _device; - gpu::ProgramContext _context; - std::size_t _batch_size_target; - gpu::ProcessingUnit _processing_unit; -}; - -#endif // ENABLE_GPU -#endif // DYNAMIC_MINER_IMPL_GPU_H diff --git a/src/miner/internal/hash-rate-counter.cpp b/src/miner/internal/hash-rate-counter.cpp deleted file mode 100644 index 6392070e7e..0000000000 --- a/src/miner/internal/hash-rate-counter.cpp +++ /dev/null @@ -1,38 +0,0 @@ - -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include "miner/internal/hash-rate-counter.h" -#include "utiltime.h" - - -void HashRateCounter::Increment(int64_t amount) -{ - // Set start time if not set and return - if (_timer_start == 0) { - Reset(); - return; - } - // Increment hashes done - _count += amount; - if (_parent) { - _parent->Increment(amount); - } - // Ignore until at least 4 seconds passed - if (GetTimeMillis() - _timer_start < 4000) { - return; - } - // Set count per second - _count_per_sec = 1000.0 * _count / (GetTimeMillis() - _timer_start); - // Reset timer and count - _count = 0; - _timer_start = GetTimeMillis(); -} - -void HashRateCounter::Reset() -{ - _count = 0; - _count_per_sec = 0; - _timer_start = GetTimeMillis(); -} diff --git a/src/miner/internal/hash-rate-counter.h b/src/miner/internal/hash-rate-counter.h deleted file mode 100644 index db33711933..0000000000 --- a/src/miner/internal/hash-rate-counter.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef DYNAMIC_INTERNAL_HASH_RATE_COUNTER_H -#define DYNAMIC_INTERNAL_HASH_RATE_COUNTER_H - -#include -#include - - -struct HashRateCounter; -using HashRateCounterRef = std::shared_ptr; - -/** - * Hash rate counter struct. - */ -struct HashRateCounter : public std::enable_shared_from_this { -private: - std::atomic _count{0}; - std::atomic _timer_start{0}; - std::atomic _count_per_sec{0}; - - HashRateCounterRef _parent; - -public: - explicit HashRateCounter() : _parent(nullptr){}; - explicit HashRateCounter(HashRateCounterRef parent) : _parent(parent){}; - - // Returns hash rate per second - operator int64_t() { return _count_per_sec; }; - - // Creates new child counter - HashRateCounterRef MakeChild() { return std::make_shared(shared_from_this()); } - - // Increments counter - void Increment(int64_t amount); - - // Resets counter and timer - void Reset(); - - // Returns start time - int64_t start() const { return _timer_start; }; -}; - -#endif // DYNAMIC_INTERNAL_HASH_RATE_COUNTER_H diff --git a/src/miner/internal/miner-base.cpp b/src/miner/internal/miner-base.cpp deleted file mode 100644 index fbacbca0cf..0000000000 --- a/src/miner/internal/miner-base.cpp +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright (c) 2016-2021 Duality Blockchain Solutions Developers -// Copyright (c) 2014-2021 The Dash Core Developers -// Copyright (c) 2009-2021 The Bitcoin Developers -// Copyright (c) 2009-2021 Satoshi Nakamoto -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include "miner/internal/miner-base.h" -#include "chainparams.h" -#include "miner/miner-util.h" -#include "primitives/block.h" -#include "util.h" -#include "validation.h" -#include "validationinterface.h" - -#include - -#include - -MinerBase::MinerBase(MinerContextRef ctx, std::size_t device_index) - : _ctx(ctx), - _device_index(device_index) -{ - GetMainSignals().ScriptForMining(_coinbase_script); - // Throw an error if no script was provided. This can happen - // due to some internal error but also if the keypool is empty. - // In the latter case, already the pointer is NULL. - if (!_coinbase_script || _coinbase_script->reserveScript.empty()) { - throw std::runtime_error("No coinbase script available (mining requires a wallet)"); - } -}; - -void MinerBase::Loop() -{ - LogPrintf("DynamicMiner -- started on %s#%d\n", DeviceName(), _device_index); - SetThreadPriority(THREAD_PRIORITY_LOWEST); - RenameThread(tfm::format("dynamic-%s-miner-%d", DeviceName(), _device_index).data()); - - CBlock block; - CBlockIndex* chain_tip = nullptr; - int64_t block_time = 0; - std::shared_ptr block_template = {nullptr}; - - try { - while (true) { - // Update block and tip if changed - if (block_time != _ctx->shared->block_time()) { - // set new block template - block_template = _ctx->shared->block_template(); - block = block_template->block; - // set block reserve script - SetBlockPubkeyScript(block, _coinbase_script->reserveScript); - // set block flag only after template - // so we've waited for RecreateBlock - block_time = _ctx->shared->block_time(); - // block template chain tip - chain_tip = _ctx->shared->tip(); - } - // Make sure we have a tip - assert(chain_tip != nullptr); - assert(block_template != nullptr); - // Increment nonce - IncrementExtraNonce(block, chain_tip, _extra_nonce); - LogPrintf("DynamicMiner -- Running miner on device %s#%d with %u transactions in block (%u bytes)\n", DeviceName(), _device_index, block.vtx.size(), - GetSerializeSize(block, SER_NETWORK, PROTOCOL_VERSION)); - // set loop start for counter - _hash_target = arith_uint256().SetCompact(block.nBits); - // start mining the block - while (true) { - // try mining the block - int64_t hashes = TryMineBlock(block); - // increment hash statistics - _ctx->counter->Increment(hashes); - // Check for stop or if block needs to be rebuilt - boost::this_thread::interruption_point(); - // Check if block was recreated - if (block_time != _ctx->shared->block_time()) { - break; - } - // Recreate block if nonce too big - if (block.nNonce >= 0xffff0000) { - _ctx->shared->RecreateBlock(); - break; - } - // Update block time - if (UpdateTime(block, _ctx->chainparams().GetConsensus(), chain_tip) < 0) { - // Recreate the block if the clock has run backwards, - // so that we can use the correct time. - _ctx->shared->RecreateBlock(); - break; - } - if (_ctx->chainparams().GetConsensus().fPowAllowMinDifficultyBlocks) { - // Changing block.nTime can change work required on testnet: - _hash_target.SetCompact(block.nBits); - } - } - } - } catch (const boost::thread_interrupted&) { - LogPrintf("DynamicMiner%s -- terminated\n", DeviceName()); - throw; - } catch (const std::runtime_error& e) { - LogPrintf("DynamicMiner%s -- runtime error: %s\n", DeviceName(), e.what()); - return; - } -} - -void MinerBase::ProcessFoundSolution(const CBlock& block, const uint256& hash) -{ - // Found a solution - SetThreadPriority(THREAD_PRIORITY_NORMAL); - LogPrintf("DynamicMiner%s:\n proof-of-work found \n hash: %s \ntarget: %s\n", DeviceName(), hash.GetHex(), _hash_target.GetHex()); - ProcessBlockFound(block, _ctx->chainparams()); - SetThreadPriority(THREAD_PRIORITY_LOWEST); - _coinbase_script->KeepScript(); - - // TODO: it needs to close all miners - // In regression test mode, stop mining after a block is found. - if (_ctx->chainparams().MineBlocksOnDemand()) { - throw boost::thread_interrupted(); - } -} diff --git a/src/miner/internal/miner-base.h b/src/miner/internal/miner-base.h deleted file mode 100644 index 7711d18253..0000000000 --- a/src/miner/internal/miner-base.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef DYNAMIC_INTERNAL_MINER_BASE_H -#define DYNAMIC_INTERNAL_MINER_BASE_H - -#include "arith_uint256.h" -#include "miner/internal/miner-context.h" - -#include - -class CBlock; -class CReserveScript; - -/** - * Base miner class for CPU and GPU miner. - */ -class MinerBase -{ -public: - MinerBase(MinerContextRef ctx, std::size_t device_index); - virtual ~MinerBase() = default; - - // Starts miner loop - void Loop(); - - // Starts miner loop - void operator()() { Loop(); }; - - // Returns miner device name - virtual const char* DeviceName() = 0; - -protected: - // Processes a new found solution - void ProcessFoundSolution(const CBlock& block, const uint256& hash); - - // tries to mine a block - virtual int64_t TryMineBlock(CBlock& block) = 0; - - // Solution must be lower or equal to - arith_uint256 _hash_target = 0; - - // Miner context - MinerContextRef _ctx; - -private: - // Miner device index - std::size_t _device_index; - - // Extra block nonce - unsigned int _extra_nonce = 0; - - // Miner coinbase script - // Includes wallet payout key - std::shared_ptr _coinbase_script{nullptr}; -}; - -#endif // DYNAMIC_INTERNAL_MINER_BASE_H diff --git a/src/miner/internal/miner-context.cpp b/src/miner/internal/miner-context.cpp deleted file mode 100644 index de41091f14..0000000000 --- a/src/miner/internal/miner-context.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2016-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include "miner/internal/miner-context.h" -#include "miner/miner-util.h" -#include "txmempool.h" -#include "validation.h" - -MinerContext::MinerContext(const CChainParams& chainparams_, CConnman& connman_) - : counter(std::make_shared()), - shared(std::make_shared(chainparams_, connman_)){}; - -MinerContext::MinerContext(MinerSharedContextRef shared_, HashRateCounterRef counter_) - : counter(counter_), shared(shared_){}; - -void MinerSharedContext::RecreateBlock() -{ - // Then we acquire unique lock so that miners wait - // for the new block to be created - boost::unique_lock guard(_mutex); - uint32_t txn_time = mempool.GetTransactionsUpdated(); - // pass if nothing changed - if (_chain_tip == chainActive.Tip() && _last_txn == txn_time) - return; - _chain_tip = chainActive.Tip(); - _block_time = GetTime(); - _block_template = CreateNewBlock(chainparams); - _last_txn = txn_time; -} diff --git a/src/miner/internal/miner-context.h b/src/miner/internal/miner-context.h deleted file mode 100644 index 50b5bf4ec9..0000000000 --- a/src/miner/internal/miner-context.h +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef DYNAMIC_INTERNAL_MINER_CONTEXT_H -#define DYNAMIC_INTERNAL_MINER_CONTEXT_H - -#include "miner/internal/hash-rate-counter.h" - -#include -#include - -#include - -class CBlock; -class CChainParams; -class CConnman; -class CBlockIndex; -struct CBlockTemplate; - -class MinerBase; -class MinerContext; -class MinerSignals; -class MinersController; - -/** Miner context shared_ptr */ -using MinerContextRef = std::shared_ptr; - -struct MinerSharedContext { -public: - const CChainParams& chainparams; - CConnman& connman; - - MinerSharedContext(const CChainParams& chainparams_, CConnman& connman_) - : chainparams(chainparams_), connman(connman_){}; - - // Returns chain tip of current block template - CBlockIndex* tip() const { return _chain_tip; } - - // Returns miner block template creation time - int64_t block_time() const { return _block_time; } - - // Returns time of last transaction in the block - uint32_t last_txn() const { return _last_txn; } - - // Returns miner block template - std::shared_ptr block_template() - { - boost::shared_lock guard(_mutex); - return _block_template; - } - -protected: - friend class MinerBase; - friend class MinerSignals; - friend class MinersController; - - // recreates miners block template - void RecreateBlock(); - -private: - // current block chain tip - std::atomic _chain_tip{nullptr}; - // atomic flag incremented on recreated block - std::atomic _block_time{0}; - // last transaction update time - std::atomic _last_txn{0}; - // shared block template for miners - std::shared_ptr _block_template{nullptr}; - // mutex protecting multiple threads recreating block - mutable boost::shared_mutex _mutex; -}; - -using MinerSharedContextRef = std::shared_ptr; - -/** - * Miner context. - */ -class MinerContext -{ -public: - HashRateCounterRef counter; - MinerSharedContextRef shared; - - MinerContext(const CChainParams& chainparams_, CConnman& connman_); - MinerContext(MinerSharedContextRef shared_, HashRateCounterRef counter_); - - // Constructs child context - explicit MinerContext(const MinerContext* ctx_) - : MinerContext(ctx_->shared, ctx_->counter->MakeChild()){}; - - // Creates child context for group or miner - MinerContextRef MakeChild() const { return std::make_shared(this); } - - // Connection manager - CConnman& connman() const { return shared->connman; } - - // Chain parameters - const CChainParams& chainparams() const { return shared->chainparams; } -}; - -#endif // DYNAMIC_INTERNAL_MINER_CONTEXT_H diff --git a/src/miner/internal/miners-controller.cpp b/src/miner/internal/miners-controller.cpp deleted file mode 100644 index 13a1d1ea30..0000000000 --- a/src/miner/internal/miners-controller.cpp +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include "miner/internal/miners-controller.h" -#include "chain.h" -#include "miner/internal/miner-context.h" -#include "miner/miner-util.h" -#include "net.h" -#include "validation.h" -#include "validationinterface.h" - - -MinersController::MinersController(const CChainParams& chainparams, CConnman& connman) - : MinersController(std::make_shared(chainparams, connman)){}; - -MinersController::MinersController(MinerContextRef ctx) - : _ctx(ctx), - _group_cpu(_ctx->MakeChild()), -#ifdef ENABLE_GPU - _group_gpu(_ctx->MakeChild()), -#endif // ENABLE_GPU - _connected(!_ctx->chainparams().MiningRequiresPeers()){}; - -void MinersController::Start() -{ - _connected = _ctx->connman().GetNodeCount(CConnman::CONNECTIONS_ALL) >= 2; - _enable_start = true; - _signals = std::make_shared(this); - // initialize block template - _ctx->shared->RecreateBlock(); - LogPrintf("MinersController::Start can_start = %v\n", can_start()); - - if (can_start()) { - _group_cpu.Start(); -#ifdef ENABLE_GPU - _group_gpu.Start(); -#endif // ENABLE_GPU - } -}; - -void MinersController::Shutdown() -{ - _enable_start = false; - _signals = nullptr; // remove signals receiver - - _group_cpu.Shutdown(); -#ifdef ENABLE_GPU - _group_gpu.Shutdown(); -#endif // ENABLE_GPU -}; - -int64_t MinersController::GetHashRate() const -{ -#ifdef ENABLE_GPU - return _group_gpu.GetHashRate() + _group_cpu.GetHashRate(); -#else - return _group_cpu.GetHashRate(); -#endif // ENABLE_GPU -} - -MinerSignals::MinerSignals(MinersController* ctr) - : _ctr(ctr), - _node(_ctr->ctx()->connman().ConnectSignalNode(boost::bind(&MinerSignals::NotifyNode, this, _1))), - _block(GetMainSignals().UpdatedBlockTip.connect(boost::bind(&MinerSignals::NotifyBlock, this, _1, _2, _3))), - _txn(GetMainSignals().SyncTransaction.connect(boost::bind(&MinerSignals::NotifyTransaction, this, _1, _2, _3))){}; - -void MinerSignals::NotifyNode(const CNode* node) -{ - if (_ctr->ctx()->connman().GetNodeCount(CConnman::CONNECTIONS_ALL) >= 2) { - _ctr->_connected = true; - } else if (_ctr->ctx()->connman().GetNodeCount(CConnman::CONNECTIONS_ALL) <= 1) { - _ctr->_connected = false; - } -}; - -void MinerSignals::NotifyBlock(const CBlockIndex* index_new, const CBlockIndex* index_fork, bool fInitialDownload) -{ - if (fInitialDownload) - return; - // Compare with current tip (checks for unexpected behaviour or old block) - if (index_new != chainActive.Tip()) - return; - // Create new block template for miners - _ctr->_ctx->shared->RecreateBlock(); - // start miners - if (_ctr->can_start()) { - _ctr->_group_cpu.Start(); -#ifdef ENABLE_GPU - _ctr->_group_gpu.Start(); -#endif // ENABLE_GPU - } -}; - -void MinerSignals::NotifyTransaction(const CTransaction& txn, const CBlockIndex* index, int posInBlock) -{ - // check if blockchain has synced, has more than 1 peer and is enabled before recreating blocks - if (IsInitialBlockDownload() || !_ctr->can_start()) - return; - if (GetTime() - _ctr->_ctx->shared->last_txn() > 60) { - _ctr->_ctx->shared->RecreateBlock(); - } -}; diff --git a/src/miner/internal/miners-controller.h b/src/miner/internal/miners-controller.h deleted file mode 100644 index 219fe79e8a..0000000000 --- a/src/miner/internal/miners-controller.h +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef DYNAMIC_INTERNAL_MINERS_CONTROLLER_H -#define DYNAMIC_INTERNAL_MINERS_CONTROLLER_H - -#include - -#include "chainparams.h" -#include "miner/impl/miner-cpu.h" -#include "miner/impl/miner-gpu.h" -#include "miner/internal/miners-group.h" - -class CNode; -class CConnman; -class CReserveScript; -class CChainParams; -class CBlockIndex; -class CTransaction; -struct CBlockTemplate; - -class MinerSignals; -class MinersController; - -void ConnectMinerSignals(MinersController*); - -/** - * Miner controller for both CPU and GPU threads. - */ -class MinersController -{ -public: - MinersController(const CChainParams& chainparams, CConnman& connman); - MinersController(MinerContextRef ctx); - virtual ~MinersController() = default; - - // Starts miners - void Start(); - - // Shuts down all miner threads - void Shutdown(); - - // Gets combined hash rate of GPU and CPU - int64_t GetHashRate() const; - - // Returns CPU miners thread group - MinersThreadGroup& group_cpu() { return _group_cpu; } - -#ifdef ENABLE_GPU - // Returns GPU miners thread group - MinersThreadGroup& group_gpu() - { - return _group_gpu; - } -#endif // ENABLE_GPU - -protected: - // Returns shared miner context - MinerContextRef ctx() const { return _ctx; } - - // Starts miner only if can - void StartIfEnabled(); - - // Returns true if enabled, connected and has block. - bool can_start() const { return _connected && _enable_start && _ctx->shared->block_template(); } - - // Miner signals class - friend class MinerSignals; - - // Optional miner signals - // It can be empty when miner is shutdown - std::shared_ptr _signals{nullptr}; - - // Miner context - MinerContextRef _ctx; - // Miner CPU Thread group - MinersThreadGroup _group_cpu; -#ifdef ENABLE_GPU - // Miner GPU Thread group - MinersThreadGroup _group_gpu; -#endif // ENABLE_GPU - - // Set to true when at least one node is connected - bool _connected = false; - - // Set to true when user requested start - bool _enable_start = false; - - // Time of last transaction signal - int64_t _last_txn_time = 0; - // Time of last time block template was created - int64_t _last_sync_time = 0; -}; - -class MinerSignals -{ -private: - MinersController* _ctr; - - boost::signals2::scoped_connection _node; - boost::signals2::scoped_connection _block; - boost::signals2::scoped_connection _txn; - -public: - MinerSignals(MinersController* _ctr); - virtual ~MinerSignals() = default; - -private: - // Handles new node connection - virtual void NotifyNode(const CNode* node); - - // Handles updated blockchain tip - virtual void NotifyBlock(const CBlockIndex* pindexNew, const CBlockIndex* pindexFork, bool fInitialDownload); - - // Handles new transaction - virtual void NotifyTransaction(const CTransaction& txn, const CBlockIndex* pindex, int posInBlock); -}; - -#endif // DYNAMIC_INTERNAL_MINERS_CONTROLLER_H diff --git a/src/miner/internal/miners-group.h b/src/miner/internal/miners-group.h deleted file mode 100644 index 6e9a5b875a..0000000000 --- a/src/miner/internal/miners-group.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef DYNAMIC_INTERNAL_MINERS_GROUP_H -#define DYNAMIC_INTERNAL_MINERS_GROUP_H - -#include "miner/internal/miner-context.h" -#include "miner/internal/thread-group.h" - - -/** - * Miners group threads controller. - */ -template -class MinersThreadGroup : public ThreadGroup -{ -public: - MinersThreadGroup(MinerContextRef ctx) - : ThreadGroup(ctx){}; - - // Shuts down all miner threads - void Shutdown() - { - // Shutdown all threads - SetSize(0); - // It's not updated and instead of reading - // system time and comparing with last update - // it is just reset when all threads are shut - this->_ctx->counter->Reset(); - }; - - // Sets amount of threads - void SetSize(uint8_t size) - { - // Set thread group size - ThreadGroup::SetSize(size); - // Reset hash rate counter - if (size == 0) { - this->_ctx->counter->Reset(); - } - }; - - // Gets hash rate of all threads in the group - int64_t GetHashRate() const { return *this->_ctx->counter; }; -}; - - -#endif // DYNAMIC_INTERNAL_MINERS_GROUP_H diff --git a/src/miner/internal/thread-group.h b/src/miner/internal/thread-group.h deleted file mode 100644 index 521d0f68e9..0000000000 --- a/src/miner/internal/thread-group.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef DYNAMIC_INTERNAL_THREAD_GROUP_H -#define DYNAMIC_INTERNAL_THREAD_GROUP_H - -#include -#include -#include - -/** - * Miner threads controller. - * Separate object for CPU and GPU. - */ -template -class ThreadGroup -{ -public: - explicit ThreadGroup(Context ctx); - - // Starts set amount of target threads - void Start() - { - boost::unique_lock guard(_mutex); - SyncGroupTarget(); - }; - - // Shuts down all threads - void Shutdown() { SetSize(0); }; - - // Sets amount of threads - void SetSize(uint8_t size) - { - boost::unique_lock guard(_mutex); - // sync only if lowering - // requires sync if higher - if (_threads.size() > size) { - SyncGroupTarget(); - _target_threads = size; - } else { - _target_threads = size; - } - }; - - // Size of a thread group - uint8_t size() const { return _target_threads; } - -protected: - Context _ctx; - -private: - // Starts or shutdowns threads to meet the target - // Requires a mutex lock before call - void SyncGroupTarget(); - - size_t _devices; - uint8_t _target_threads = 0; - std::vector > _threads; - mutable boost::shared_mutex _mutex; -}; - -/** Miners device group class constructor */ -template -ThreadGroup::ThreadGroup(Context ctx) - : _ctx(ctx), _devices(T::TotalDevices()){}; - -template -void ThreadGroup::SyncGroupTarget() -{ - size_t current; - while ((current = _threads.size()) != _target_threads) { - if (current < _target_threads) { - auto miner = std::shared_ptr(new T(_ctx->MakeChild(), current % _devices)); - _threads.push_back(std::make_shared([miner] { - (*miner)(); - })); - } else { - std::shared_ptr thread = _threads.back(); - _threads.pop_back(); - thread->interrupt(); - } - } -}; - -#endif // DYNAMIC_INTERNAL_THREAD_GROUP_H diff --git a/src/miner/miner.cpp b/src/miner/miner.cpp deleted file mode 100644 index 0292b5556e..0000000000 --- a/src/miner/miner.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2016-2021 Duality Blockchain Solutions Developers -// Copyright (c) 2014-2021 The Dash Core Developers -// Copyright (c) 2009-2021 The Bitcoin Developers -// Copyright (c) 2009-2021 Satoshi Nakamoto -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include "miner/miner.h" -#include "chainparams.h" -#include "consensus/consensus.h" -#include "consensus/validation.h" -#include "miner/internal/miners-controller.h" -#include "net.h" -#include "primitives/transaction.h" -#include "utilmoneystr.h" -#include "validation.h" -#include "validationinterface.h" - -void InitMiners(const CChainParams& chainparams, CConnman& connman) -{ - if (!gMiners) - gMiners.reset(new MinersController(chainparams, connman)); -} - -void StartMiners() -{ - assert(gMiners); - gMiners->Start(); -}; - -void ShutdownMiners() -{ - if (gMiners) - gMiners->Shutdown(); -}; - -void ShutdownCPUMiners() -{ - if (gMiners) - gMiners->group_cpu().Shutdown(); -}; - -void ShutdownGPUMiners() -{ -#ifdef ENABLE_GPU - if (gMiners) - gMiners->group_gpu().Shutdown(); -#endif // ENABLE_GPU -}; - -int64_t GetHashRate() -{ - if (gMiners) - return gMiners->GetHashRate(); - return 0; -}; - -int64_t GetCPUHashRate() -{ - if (gMiners) - return gMiners->group_cpu().GetHashRate(); - return 0; -}; - -int64_t GetGPUHashRate() -{ -#ifdef ENABLE_GPU - if (gMiners) - return gMiners->group_gpu().GetHashRate(); -#endif // ENABLE_GPU - return 0; -}; - -void SetCPUMinerThreads(uint8_t target) -{ - assert(gMiners); - gMiners->group_cpu().SetSize(target); -}; - -void SetGPUMinerThreads(uint8_t target) -{ -#ifdef ENABLE_GPU - assert(gMiners); - gMiners->group_gpu().SetSize(target); -#endif // ENABLE_GPU -}; - -std::unique_ptr gMiners = {nullptr}; diff --git a/src/miner/miner.h b/src/miner/miner.h deleted file mode 100644 index 7916f7f01d..0000000000 --- a/src/miner/miner.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2019-2021 Duality Blockchain Solutions Developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef DYNAMIC_MINER_H -#define DYNAMIC_MINER_H - -#include "miner-util.h" // IWYU pragma: keep - -class CConnman; -class CChainParams; - -class MinersController; - -/** It's constructed and set in init.cpp */ -extern std::unique_ptr gMiners; - -/** Initializes miners controller */ -void InitMiners(const CChainParams& chainparams, CConnman& connman); -/** Start all miner threads */ -void StartMiners(); -/** Shuts down all miner threads */ -void ShutdownMiners(); -/** Shuts down all CPU miner threads */ -void ShutdownCPUMiners(); -/** Shuts down all GPU miner threads */ -void ShutdownGPUMiners(); - -/** Gets hash rate of GPU and CPU */ -int64_t GetHashRate(); -/** Gets hash rate of CPU */ -int64_t GetCPUHashRate(); -/** Gets hash rate of GPU */ -int64_t GetGPUHashRate(); - -/** Sets amount of CPU miner threads */ -void SetCPUMinerThreads(uint8_t target); -/** Sets amount of GPU miner threads */ -void SetGPUMinerThreads(uint8_t target); - -#endif // DYNAMIC_MINER_H From 31d9f1a7e6d1e35a1a7b359d00d9416bdf3b8043 Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <6098974-kittywhiskers@users.noreply.gitlab.com> Date: Sun, 16 May 2021 22:21:01 +0530 Subject: [PATCH 4/4] core: remove unused gui elements --- src/qt/forms/miningpage.ui | 998 ------------------------------------- src/qt/miningpage.cpp | 440 ---------------- src/qt/miningpage.h | 77 --- 3 files changed, 1515 deletions(-) delete mode 100644 src/qt/forms/miningpage.ui delete mode 100644 src/qt/miningpage.cpp delete mode 100644 src/qt/miningpage.h diff --git a/src/qt/forms/miningpage.ui b/src/qt/forms/miningpage.ui deleted file mode 100644 index e9215f887a..0000000000 --- a/src/qt/forms/miningpage.ui +++ /dev/null @@ -1,998 +0,0 @@ - - - MiningPage - - - - 0 - 0 - 860 - 474 - - - - Form - - - - - - - - - - - - - - - - - - QTabWidget:tab-bar { alignment: left; } -QTabBar { alignment: left; } - - - - 0 - - - - true - - - - 0 - 0 - - - - - 0 - 0 - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - CPU - - - - - - - - - 250 - 0 - - - - QSlider::groove:horizontal { - border: 1px solid #bbb; - background: white; - height: 10px; - border-radius: 4px; - } - - QSlider::sub-page:horizontal { - background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, - stop: 0 #66e, stop: 1 #bbf); - background: qlineargradient(x1: 0, y1: 0.2, x2: 1, y2: 1, - stop: 0 #bbf, stop: 1 #520072); - border: 1px solid #777; - height: 10px; - border-radius: 4px; - } - - QSlider::add-page:horizontal { - background: #fff; - border: 1px solid #777; - height: 10px; - border-radius: 4px; - } - - QSlider::handle:horizontal { - background: qlineargradient(x1:0, y1:0, x2:1, y2:1, - stop:0 #eee, stop:1 #520072); - border: 1px solid #777; - width: 13px; - margin-top: -2px; - margin-bottom: -2px; - border-radius: 4px; - } - - QSlider::handle:horizontal:hover { - background: qlineargradient(x1:0, y1:0, x2:1, y2:1, - stop:0 #fff, stop:1 #ddd); - border: 1px solid #444; - border-radius: 4px; - } - - QSlider::sub-page:horizontal:disabled { - background: #bbb; - border-color: #999; - } - - QSlider::add-page:horizontal:disabled { - background: #eee; - border-color: #999; - } - - QSlider::handle:horizontal:disabled { - background: #eee; - border: 1px solid #aaa; - border-radius: 4px; - } - - - Qt::Horizontal - - - - - - - - - Qt::Vertical - - - - 10 - 20 - - - - - - - - - - - 75 - true - - - - 5 m - - - - - - - QPushButton { /* Global Button Style */ -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #8F5BA3, stop: .1 #771F99, stop: .95 #771F99, stop: 1 #520072); -border:0; -border-radius:3px; -color:#ffffff; -font-size:12px; -font-weight:bold; -padding-left:25px; -padding-right:25px; -padding-top:5px; -padding-bottom:5px; -} - -QPushButton:hover { -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #BA77D4, stop: .1 #9F2DCC, stop: .95 #9F2DCC, stop: 1 #7400A1); -} - -QPushButton:focus { -border:none; -outline:none; -} - -QPushButton:pressed { -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #8F5BA3, stop: .1 #771F99, stop: .95 #771F99, stop: 1 #520072); -border:1px solid #520072; -} - - - Clear - - - - - - - - - - - - 0 - 0 - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - - - - - Qt::Vertical - - - - 10 - 200 - - - - - - - - - - - - QPushButton { /* Global Button Style */ -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #8F5BA3, stop: .1 #771F99, stop: .95 #771F99, stop: 1 #520072); -border:0; -border-radius:3px; -color:#ffffff; -font-size:12px; -font-weight:bold; -padding-left:25px; -padding-right:25px; -padding-top:5px; -padding-bottom:5px; -} - -QPushButton:hover { -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #BA77D4, stop: .1 #9F2DCC, stop: .95 #9F2DCC, stop: 1 #7400A1); -} - -QPushButton:focus { -border:none; -outline:none; -} - -QPushButton:pressed { -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #8F5BA3, stop: .1 #771F99, stop: .95 #771F99, stop: 1 #520072); -border:1px solid #520072; -} - - - Start mining - - - - - - - Qt::Vertical - - - - 20 - 100 - - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Show Hash Meter Graph - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - - - - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - 4 - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - - - - - - 250 - 0 - - - - QSlider::groove:horizontal { -border: 1px solid #bbb; -background: white; -height: 10px; -border-radius: 4px; -} - -QSlider::sub-page:horizontal { -background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, - stop: 0 #66e, stop: 1 #bbf); -background: qlineargradient(x1: 0, y1: 0.2, x2: 1, y2: 1, - stop: 0 #bbf, stop: 1 #520072); -border: 1px solid #777; -height: 10px; -border-radius: 4px; -} - -QSlider::add-page:horizontal { -background: #fff; -border: 1px solid #777; -height: 10px; -border-radius: 4px; -} - -QSlider::handle:horizontal { -background: qlineargradient(x1:0, y1:0, x2:1, y2:1, - stop:0 #eee, stop:1 #520072); -border: 1px solid #777; -width: 13px; -margin-top: -2px; -margin-bottom: -2px; -border-radius: 4px; -} - -QSlider::handle:horizontal:hover { -background: qlineargradient(x1:0, y1:0, x2:1, y2:1, - stop:0 #fff, stop:1 #ddd); -border: 1px solid #444; -border-radius: 4px; -} - -QSlider::sub-page:horizontal:disabled { -background: #bbb; -border-color: #999; -} - -QSlider::add-page:horizontal:disabled { -background: #eee; -border-color: #999; -} - -QSlider::handle:horizontal:disabled { -background: #eee; -border: 1px solid #aaa; -border-radius: 4px; -} - - - Qt::Horizontal - - - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Average spacing between your blocks: - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Your hashrate (built-in miner): - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Network hashrate: - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Number of CPU threads to use: - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - 0 - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - ? - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - - - - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - - - - - - 0 - 0 - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - GPU - - - - - - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - 4 - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - - - - - - 250 - 0 - - - - QSlider::groove:horizontal { -border: 1px solid #bbb; -background: white; -height: 10px; -border-radius: 4px; -} - -QSlider::sub-page:horizontal { -background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, - stop: 0 #66e, stop: 1 #bbf); -background: qlineargradient(x1: 0, y1: 0.2, x2: 1, y2: 1, - stop: 0 #bbf, stop: 1 #520072); -border: 1px solid #777; -height: 10px; -border-radius: 4px; -} - -QSlider::add-page:horizontal { -background: #fff; -border: 1px solid #777; -height: 10px; -border-radius: 4px; -} - -QSlider::handle:horizontal { -background: qlineargradient(x1:0, y1:0, x2:1, y2:1, - stop:0 #eee, stop:1 #520072); -border: 1px solid #777; -width: 13px; -margin-top: -2px; -margin-bottom: -2px; -border-radius: 4px; -} - -QSlider::handle:horizontal:hover { -background: qlineargradient(x1:0, y1:0, x2:1, y2:1, - stop:0 #fff, stop:1 #ddd); -border: 1px solid #444; -border-radius: 4px; -} - -QSlider::sub-page:horizontal:disabled { -background: #bbb; -border-color: #999; -} - -QSlider::add-page:horizontal:disabled { -background: #eee; -border-color: #999; -} - -QSlider::handle:horizontal:disabled { -background: #eee; -border: 1px solid #aaa; -border-radius: 4px; -} - - - Qt::Horizontal - - - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Average spacing between your blocks: - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Your hashrate (built-in miner): - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Network hashrate: - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Number of GPU devices to use: - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - 0 - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - ? - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - - - - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - - - - - - QPushButton { /* Global Button Style */ -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #8F5BA3, stop: .1 #771F99, stop: .95 #771F99, stop: 1 #520072); -border:0; -border-radius:3px; -color:#ffffff; -font-size:12px; -font-weight:bold; -padding-left:25px; -padding-right:25px; -padding-top:5px; -padding-bottom:5px; -} - -QPushButton:hover { -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #BA77D4, stop: .1 #9F2DCC, stop: .95 #9F2DCC, stop: 1 #7400A1); -} - -QPushButton:focus { -border:none; -outline:none; -} - -QPushButton:pressed { -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #8F5BA3, stop: .1 #771F99, stop: .95 #771F99, stop: 1 #520072); -border:1px solid #520072; -} - - - Start mining - - - - - - - Qt::Vertical - - - - 20 - 100 - - - - - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - Show Hash Meter Graph - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - - - - - - - 0 - 0 - - - - background-color: rgb(0, 0, 0); -color: rgb(255, 255, 255); - - - - - - - Qt::Vertical - - - - 10 - 200 - - - - - - - - - - - - - 250 - 0 - - - - QSlider::groove:horizontal { - border: 1px solid #bbb; - background: white; - height: 10px; - border-radius: 4px; - } - - QSlider::sub-page:horizontal { - background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, - stop: 0 #66e, stop: 1 #bbf); - background: qlineargradient(x1: 0, y1: 0.2, x2: 1, y2: 1, - stop: 0 #bbf, stop: 1 #520072); - border: 1px solid #777; - height: 10px; - border-radius: 4px; - } - - QSlider::add-page:horizontal { - background: #fff; - border: 1px solid #777; - height: 10px; - border-radius: 4px; - } - - QSlider::handle:horizontal { - background: qlineargradient(x1:0, y1:0, x2:1, y2:1, - stop:0 #eee, stop:1 #520072); - border: 1px solid #777; - width: 13px; - margin-top: -2px; - margin-bottom: -2px; - border-radius: 4px; - } - - QSlider::handle:horizontal:hover { - background: qlineargradient(x1:0, y1:0, x2:1, y2:1, - stop:0 #fff, stop:1 #ddd); - border: 1px solid #444; - border-radius: 4px; - } - - QSlider::sub-page:horizontal:disabled { - background: #bbb; - border-color: #999; - } - - QSlider::add-page:horizontal:disabled { - background: #eee; - border-color: #999; - } - - QSlider::handle:horizontal:disabled { - background: #eee; - border: 1px solid #aaa; - border-radius: 4px; - } - - - Qt::Horizontal - - - - - - - - - Qt::Vertical - - - - 10 - 20 - - - - - - - - - - - 75 - true - - - - 5 m - - - - - - - QPushButton { /* Global Button Style */ -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #8F5BA3, stop: .1 #771F99, stop: .95 #771F99, stop: 1 #520072); -border:0; -border-radius:3px; -color:#ffffff; -font-size:12px; -font-weight:bold; -padding-left:25px; -padding-right:25px; -padding-top:5px; -padding-bottom:5px; -} - -QPushButton:hover { -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #BA77D4, stop: .1 #9F2DCC, stop: .95 #9F2DCC, stop: 1 #7400A1); -} - -QPushButton:focus { -border:none; -outline:none; -} - -QPushButton:pressed { -background-color:qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: .01 #8F5BA3, stop: .1 #771F99, stop: .95 #771F99, stop: 1 #520072); -border:1px solid #520072; -} - - - Clear - - - - - - - - - - - - - - - - - - - - - - - HashRateGraphWidget - QWidget -
hashrategraphwidget.h
- 1 - - clear() - -
-
- - -
diff --git a/src/qt/miningpage.cpp b/src/qt/miningpage.cpp deleted file mode 100644 index 19ff14226e..0000000000 --- a/src/qt/miningpage.cpp +++ /dev/null @@ -1,440 +0,0 @@ -#include "miningpage.h" -#include "ui_miningpage.h" - -#include "dynode-sync.h" -#include "guiutil.h" -#include "miner/miner.h" -#include "net.h" -#include "util.h" -#include "utiltime.h" -#include "validation.h" -#include "walletmodel.h" - -#include -#include - -MiningPage::MiningPage(const PlatformStyle* platformStyle, QWidget* parent) : QWidget(parent), - ui(new Ui::MiningPage), - hasMiningprivkey(false) -{ - ui->setupUi(this); - - int nCPUMaxUseThreads = GUIUtil::CPUMaxThreads(); -#ifdef ENABLE_GPU - int nGPUMaxUseThreads = GUIUtil::GPUMaxThreads(); -#endif - std::string PrivAddress = GetArg("-miningprivkey", ""); - if (!PrivAddress.empty()) { - CDynamicSecret Secret; - Secret.SetString(PrivAddress); - if (Secret.IsValid()) { - CDynamicAddress Address; - Address.Set(Secret.GetKey().GetPubKey().GetID()); - ui->labelAddress->setText(QString("All mined coins will go to %1").arg(Address.ToString().c_str())); - hasMiningprivkey = true; - } - } - - if (!dynodeSync.IsSynced() || !dynodeSync.IsBlockchainSynced()) { - ui->sliderCPUCores->setVisible(false); - ui->labelNCPUCores->setText(tr("Slider will show once Dynamic has finished syncing")); - } else { - ui->sliderCPUCores->setVisible(true); - ui->labelNCPUCores->setText(QString("%1").arg(nCPUMaxUseThreads)); - } - - ui->sliderCPUCores->setMinimum(0); - ui->sliderCPUCores->setMaximum(nCPUMaxUseThreads); - ui->sliderCPUCores->setValue(nCPUMaxUseThreads); - -#ifdef ENABLE_GPU - if (!dynodeSync.IsSynced() || !dynodeSync.IsBlockchainSynced()) { - ui->sliderGPUCores->setVisible(false); - ui->labelNGPUCores->setText(tr("Slider will show once Dynamic has finished syncing")); - } else { - ui->sliderGPUCores->setVisible(true); - ui->labelNGPUCores->setText(QString("%1").arg(nGPUMaxUseThreads)); - } - - ui->sliderGPUCores->setMinimum(0); - ui->sliderGPUCores->setMaximum(nGPUMaxUseThreads); - ui->sliderGPUCores->setValue(nGPUMaxUseThreads); - ui->pushSwitchGPUMining->setVisible(true); - ui->checkBoxShowGPUGraph->setVisible(true); -#else - ui->sliderGPUCores->setVisible(false); - ui->labelNGPUCores->setText(tr("GPU mining is not supported in this version of Dynamic")); - ui->pushSwitchGPUMining->setVisible(false); - ui->checkBoxShowGPUGraph->setVisible(false); -#endif - - ui->sliderCPUGraphSampleTime->setMaximum(0); - ui->sliderCPUGraphSampleTime->setMaximum(6); - -#ifdef ENABLE_GPU - ui->sliderGPUGraphSampleTime->setMaximum(0); - ui->sliderGPUGraphSampleTime->setMaximum(6); -#else - ui->sliderGPUGraphSampleTime->setVisible(false); -#endif - -#ifdef ENABLE_GPU - ui->labelGPUGraphSampleSize->setVisible(true); -#else - ui->labelGPUGraphSampleSize->setVisible(false); -#endif - - ui->sliderCPUCores->setToolTip(tr("Use the slider to select the amount of CPU threads to use")); -#ifdef ENABLE_GPU - ui->sliderGPUCores->setToolTip(tr("Use the slider to select the amount of GPU devices to use")); -#endif - ui->labelCPUMinerHashRate->setToolTip(tr("This shows the hashrate of your CPU whilst mining")); -#ifdef ENABLE_GPU - ui->labelGPUMinerHashRate->setToolTip(tr("This shows the hashrate of your GPU whilst mining")); -#endif - ui->labelNetHashRateCPU->setToolTip(tr("This shows the overall hashrate of the Dynamic network")); - ui->labelNetHashRateGPU->setToolTip(tr("This shows the overall hashrate of the Dynamic network")); - ui->labelNextCPUBlock->setToolTip(tr("This shows the average time between the blocks you have mined")); -#ifdef ENABLE_GPU - ui->labelNextGPUBlock->setToolTip(tr("This shows the average time between the blocks you have mined")); -#endif - - connect(ui->sliderCPUCores, SIGNAL(valueChanged(int)), this, SLOT(changeNumberOfCPUThreads(int))); -#ifdef ENABLE_GPU - connect(ui->sliderGPUCores, SIGNAL(valueChanged(int)), this, SLOT(changeNumberOfGPUThreads(int))); -#endif - connect(ui->sliderCPUGraphSampleTime, SIGNAL(valueChanged(int)), this, SLOT(changeCPUSampleTime(int))); -#ifdef ENABLE_GPU - connect(ui->sliderGPUGraphSampleTime, SIGNAL(valueChanged(int)), this, SLOT(changeGPUSampleTime(int))); -#endif - connect(ui->pushSwitchCPUMining, SIGNAL(clicked()), this, SLOT(switchCPUMining())); -#ifdef ENABLE_GPU - connect(ui->pushSwitchGPUMining, SIGNAL(clicked()), this, SLOT(switchGPUMining())); -#endif - connect(ui->pushButtonClearCPUData, SIGNAL(clicked()), this, SLOT(clearCPUHashRateData())); -#ifdef ENABLE_GPU - connect(ui->pushButtonClearGPUData, SIGNAL(clicked()), this, SLOT(clearGPUHashRateData())); -#else - ui->pushButtonClearGPUData->setVisible(false); -#endif - connect(ui->checkBoxShowCPUGraph, SIGNAL(stateChanged(int)), this, SLOT(showCPUHashRate(int))); -#ifdef ENABLE_GPU - connect(ui->checkBoxShowGPUGraph, SIGNAL(stateChanged(int)), this, SLOT(showGPUHashRate(int))); -#endif - - ui->minerCPUHashRateWidget->graphType = HashRateGraphWidget::GraphType::MINER_CPU_HASHRATE; - ui->minerCPUHashRateWidget->UpdateSampleTime(HashRateGraphWidget::SampleTime::FIVE_MINUTES); - -#ifdef ENABLE_GPU - ui->minerGPUHashRateWidget->graphType = HashRateGraphWidget::GraphType::MINER_GPU_HASHRATE; - ui->minerGPUHashRateWidget->UpdateSampleTime(HashRateGraphWidget::SampleTime::FIVE_MINUTES); -#endif - - showCPUHashMeterControls(false); -#ifdef ENABLE_GPU - showGPUHashMeterControls(false); -#endif - fCPUMinerOn = false; - fGPUMinerOn = false; - updateUI(); - startTimer(3511); -} - -MiningPage::~MiningPage() -{ - delete ui; -} - -void MiningPage::setModel(WalletModel* model) -{ - this->model = model; -} - -void MiningPage::updateUI() -{ - if (dynodeSync.IsSynced() && dynodeSync.IsBlockchainSynced()) { -#ifdef ENABLE_GPU - if (ui->sliderGPUCores->isHidden()) { - int nThreads = ui->sliderGPUCores->value(); - ui->sliderGPUCores->setVisible(true); - ui->labelNGPUCores->setText(QString("%1").arg(nThreads)); - } -#endif - if (ui->sliderCPUCores->isHidden()) { - int nThreads = ui->sliderCPUCores->value(); - ui->sliderCPUCores->setVisible(true); - ui->labelNCPUCores->setText(QString("%1").arg(nThreads)); - } - } - qint64 networkHashrate = GUIUtil::GetNetworkHashPS(120, -1); - qint64 hashrate = GetHashRate(); - - ui->labelNetHashRateCPU->setText(GUIUtil::FormatHashRate(networkHashrate)); - ui->labelNetHashRateGPU->setText(GUIUtil::FormatHashRate(networkHashrate)); - ui->labelCPUMinerHashRate->setText(GUIUtil::FormatHashRate(GetCPUHashRate())); -#ifdef ENABLE_GPU - ui->labelGPUMinerHashRate->setText(GUIUtil::FormatHashRate(GetGPUHashRate())); -#endif - - QString nextBlockTime; - if (hashrate == 0) { - nextBlockTime = QChar(L'∞'); - } else { - arith_uint256 target; - target.SetCompact(chainActive.Tip()->nBits); - arith_uint256 expectedTime = (arith_uint256(1) << 256) / (target * hashrate); - nextBlockTime = GUIUtil::FormatTimeInterval(expectedTime); - } - - ui->labelNextCPUBlock->setText(nextBlockTime); -#ifdef ENABLE_GPU - ui->labelNextGPUBlock->setText(nextBlockTime); - updateGPUPushSwitch(); -#endif - updateCPUPushSwitch(); -} - -void MiningPage::updatePushSwitch(QPushButton* pushSwitch, bool minerOn) -{ - if (!dynodeSync.IsSynced() || !dynodeSync.IsBlockchainSynced()) { - pushSwitch->setToolTip(tr("Blockchain/Dynodes are not synced, please wait until fully synced before mining!")); - pushSwitch->setText(tr("Disabled")); - pushSwitch->setEnabled(false); - return; - } - if (minerOn) { - pushSwitch->setToolTip(tr("Click 'Stop mining' to stop mining!")); - pushSwitch->setText(tr("Stop mining")); - } else if (!minerOn) { - pushSwitch->setToolTip(tr("Click 'Start mining' to begin mining!")); - pushSwitch->setText(tr("Start mining")); - } - pushSwitch->setEnabled(true); -} - -void MiningPage::updateCPUPushSwitch() -{ - updatePushSwitch(ui->pushSwitchCPUMining, fCPUMinerOn); -} - -#ifdef ENABLE_GPU -void MiningPage::updateGPUPushSwitch() -{ - updatePushSwitch(ui->pushSwitchGPUMining, fGPUMinerOn); -} -#endif - -void MiningPage::StartCPUMiner() -{ - LogPrintf("StartCPUMiner %d (%s)\n", ui->sliderCPUCores->value(), fCPUMinerOn); - fCPUMinerOn = true; - InitMiners(Params(), *g_connman); - changeNumberOfCPUThreads(ui->sliderCPUCores->value()); - updateUI(); -} - -#ifdef ENABLE_GPU -void MiningPage::StartGPUMiner() -{ - fGPUMinerOn = true; - InitMiners(Params(), *g_connman); - changeNumberOfGPUThreads(ui->sliderGPUCores->value()); - updateUI(); -} -#endif - -void MiningPage::StopCPUMiner() -{ - LogPrintf("StopCPUMiner %d (%s)\n", ui->sliderCPUCores->value(), fCPUMinerOn); - fCPUMinerOn = false; - changeNumberOfCPUThreads(0, true); - ShutdownCPUMiners(); - updateUI(); -} - -#ifdef ENABLE_GPU -void MiningPage::StopGPUMiner() -{ - fGPUMinerOn = false; - changeNumberOfGPUThreads(0, true); - ShutdownGPUMiners(); - updateUI(); -} -#endif - -bool MiningPage::isMinerOn() -{ -#ifdef ENABLE_GPU - return fCPUMinerOn || fGPUMinerOn; -#else - return fCPUMinerOn; -#endif -} - -void MiningPage::changeNumberOfCPUThreads(int i, bool shutdown) -{ - if (!shutdown) - ui->labelNCPUCores->setText(QString("%1").arg(i)); - ForceSetArg("-gen", isMinerOn() ? "1" : "0"); - ForceSetArg("-genproclimit-cpu", isMinerOn() ? i : 0); - InitMiners(Params(), *g_connman); - SetCPUMinerThreads(i); - if (fCPUMinerOn) - StartMiners(); -} - -#ifdef ENABLE_GPU -void MiningPage::changeNumberOfGPUThreads(int i, bool shutdown) -{ - if (!shutdown) - ui->labelNGPUCores->setText(QString("%1").arg(i)); - ForceSetArg("-gen", isMinerOn() ? "1" : "0"); - ForceSetArg("-genproclimit-gpu", isMinerOn() ? i : 0); - InitMiners(Params(), *g_connman); - SetGPUMinerThreads(i); - if (fGPUMinerOn) - StartMiners(); -} -#endif - -void MiningPage::switchCPUMining() -{ - //Check to see if wallet needs upgrading - if (model->getWallet()->WalletNeedsUpgrading()) { - QMessageBox::critical(this, QObject::tr("Older wallet version detected"), - QObject::tr("Your wallet has not been fully upgraded to version 2.4. Please unlock your wallet to continue.")); - return; - } - - fCPUMinerOn = !fCPUMinerOn; - updateCPUPushSwitch(); - if (fCPUMinerOn) { - StartCPUMiner(); - } else { - StopCPUMiner(); - } -} - -#ifdef ENABLE_GPU -void MiningPage::switchGPUMining() -{ - fGPUMinerOn = !fGPUMinerOn; - updateGPUPushSwitch(); - if (fGPUMinerOn) { - StartGPUMiner(); - } else { - StopGPUMiner(); - } -} -#endif - -void MiningPage::timerEvent(QTimerEvent*) -{ - updateUI(); -} - -void MiningPage::showCPUHashRate(int i) -{ - if (i == 0) { - ui->minerCPUHashRateWidget->StopHashMeter(); - showCPUHashMeterControls(false); - } else { - ui->minerCPUHashRateWidget->StartHashMeter(); - showCPUHashMeterControls(true); - } -} - -#ifdef ENABLE_GPU -void MiningPage::showGPUHashRate(int i) -{ - if (i == 0) { - ui->minerGPUHashRateWidget->StopHashMeter(); - showGPUHashMeterControls(false); - } else { - ui->minerGPUHashRateWidget->StartHashMeter(); - showGPUHashMeterControls(true); - } -} -#endif - -void MiningPage::showCPUHashMeterControls(bool show) -{ - ui->sliderCPUGraphSampleTime->setVisible(show); - ui->labelCPUGraphSampleSize->setVisible(show); - ui->pushButtonClearCPUData->setVisible(show); -} - -#ifdef ENABLE_GPU -void MiningPage::showGPUHashMeterControls(bool show) -{ - ui->sliderGPUGraphSampleTime->setVisible(show); - ui->labelGPUGraphSampleSize->setVisible(show); - ui->pushButtonClearGPUData->setVisible(show); -} -#endif - -void MiningPage::clearCPUHashRateData() -{ - ui->minerCPUHashRateWidget->clear(); -} - -#ifdef ENABLE_GPU -void MiningPage::clearGPUHashRateData() -{ - ui->minerGPUHashRateWidget->clear(); -} -#endif - -void setSampleTimeLabel(QLabel* labelSize, HashRateGraphWidget* hashRate, int i) -{ - switch (i) { - case 0: - hashRate->UpdateSampleTime(HashRateGraphWidget::SampleTime::FIVE_MINUTES); - labelSize->setText(QString("5 minutes")); - break; - case 1: - hashRate->UpdateSampleTime(HashRateGraphWidget::SampleTime::TEN_MINUTES); - labelSize->setText(QString("10 minutes")); - break; - case 2: - hashRate->UpdateSampleTime(HashRateGraphWidget::SampleTime::THIRTY_MINUTES); - labelSize->setText(QString("30 minutes")); - break; - case 3: - hashRate->UpdateSampleTime(HashRateGraphWidget::SampleTime::ONE_HOUR); - labelSize->setText(QString("1 hour")); - break; - case 4: - hashRate->UpdateSampleTime(HashRateGraphWidget::SampleTime::EIGHT_HOURS); - labelSize->setText(QString("8 hours")); - break; - case 5: - hashRate->UpdateSampleTime(HashRateGraphWidget::SampleTime::TWELVE_HOURS); - labelSize->setText(QString("12 hours")); - break; - case 6: - hashRate->UpdateSampleTime(HashRateGraphWidget::SampleTime::ONE_DAY); - labelSize->setText(QString("1 day")); - break; - default: - hashRate->UpdateSampleTime(HashRateGraphWidget::SampleTime::ONE_DAY); - labelSize->setText(QString("1 day")); - break; - } -} - -void MiningPage::changeCPUSampleTime(int i) -{ - QLabel* labelSize = ui->labelCPUGraphSampleSize; - HashRateGraphWidget* hashRate = ui->minerCPUHashRateWidget; - setSampleTimeLabel(labelSize, hashRate, i); -} - -#ifdef ENABLE_GPU -void MiningPage::changeGPUSampleTime(int i) -{ - QLabel* labelSize = ui->labelGPUGraphSampleSize; - HashRateGraphWidget* hashRate = ui->minerGPUHashRateWidget; - setSampleTimeLabel(labelSize, hashRate, i); -} -#endif diff --git a/src/qt/miningpage.h b/src/qt/miningpage.h deleted file mode 100644 index 7d98d64d9e..0000000000 --- a/src/qt/miningpage.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (c) 2016-2021 Duality Blockchain Solutions Developers -// Copyright (c) 2014-2021 The Dash Core Developers -// Copyright (c) 2009-2021 The Bitcoin Developers -// Copyright (c) 2009-2021 Satoshi Nakamoto -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef MININGPAGE_H -#define MININGPAGE_H - -#include "platformstyle.h" - -#include "walletmodel.h" - -#include -#include - -#include - -namespace Ui -{ -class MiningPage; -} - -class MiningPage : public QWidget -{ - Q_OBJECT - -public: - explicit MiningPage(const PlatformStyle* platformStyle, QWidget* parent = 0); - ~MiningPage(); - - void setModel(WalletModel* model); - -private: - Ui::MiningPage* ui; - WalletModel* model; - std::unique_ptr unlockContext; - bool hasMiningprivkey; - bool fGPUMinerOn; - bool fCPUMinerOn; - void timerEvent(QTimerEvent* event); - void updateUI(); - void StartCPUMiner(); - void StopCPUMiner(); - void showCPUHashMeterControls(bool show); - void updateCPUPushSwitch(); -#ifdef ENABLE_GPU - void StartGPUMiner(); - void StopGPUMiner(); - void showGPUHashMeterControls(bool show); - void updateGPUPushSwitch(); -#endif - - void updatePushSwitch(QPushButton* pushSwitch, bool minerOn); - - bool isMinerOn(); - -private Q_SLOTS: - - void changeNumberOfCPUThreads(int i, bool shutdown = false); - void switchCPUMining(); - void showCPUHashRate(int i); - void changeCPUSampleTime(int i); - void clearCPUHashRateData(); - - -#ifdef ENABLE_GPU - void changeNumberOfGPUThreads(int i, bool shutdown = false); - void switchGPUMining(); - void showGPUHashRate(int i); - void changeGPUSampleTime(int i); - void clearGPUHashRateData(); -#endif -}; - -#endif // MININGPAGE_H