diff --git a/CMakeLists.txt b/CMakeLists.txt index ccd8337c..81afe249 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,7 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) ############################# option(BUILD_HIPIFY_CLANG "Enable building the CUDA->HIP converter" OFF) option(__HIP_ENABLE_PCH "Enable/Disable pre-compiled hip headers" ON) -option(HIP_OFFICIAL_BUILD "Enable/Disable for mainline/staging builds" OFF) +option(HIP_OFFICIAL_BUILD "Enable/Disable for mainline/staging builds" ON) option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) set(HIPCC_BIN_DIR "" CACHE STRING "HIPCC and HIPCONFIG binary directories") diff --git a/include/hip/amd_detail/amd_device_functions.h b/include/hip/amd_detail/amd_device_functions.h index ce421c63..1a4900db 100644 --- a/include/hip/amd_detail/amd_device_functions.h +++ b/include/hip/amd_detail/amd_device_functions.h @@ -922,7 +922,7 @@ int __syncthreads_or(int predicate) PIPE_ID 7:6 Pipeline from which the wave was dispatched. CU_ID 11:8 Compute Unit the wave is assigned to. SH_ID 12 Shader Array (within an SE) the wave is assigned to. - SE_ID 14:13 Shader Engine the wave is assigned to. + SE_ID 15:13 Shader Engine the wave is assigned to. TG_ID 19:16 Thread-group ID VM_ID 23:20 Virtual Memory ID QUEUE_ID 26:24 Queue from which this wave was dispatched. @@ -935,7 +935,7 @@ int __syncthreads_or(int predicate) #define HW_ID_CU_ID_SIZE 4 #define HW_ID_CU_ID_OFFSET 8 -#define HW_ID_SE_ID_SIZE 2 +#define HW_ID_SE_ID_SIZE 3 #define HW_ID_SE_ID_OFFSET 13 /* diff --git a/include/hip/amd_detail/amd_hip_bfloat16.h b/include/hip/amd_detail/amd_hip_bfloat16.h index 8c7f7839..deb3bfb7 100644 --- a/include/hip/amd_detail/amd_hip_bfloat16.h +++ b/include/hip/amd_detail/amd_hip_bfloat16.h @@ -32,10 +32,8 @@ #include "host_defines.h" #if defined(__HIPCC_RTC__) #define __HOST_DEVICE__ __device__ - #define HIP_OSTREAM __hip_internal::ostream #else #define __HOST_DEVICE__ __host__ __device__ - #define HIP_OSTREAM std::ostream #endif #if __cplusplus < 201103L || !defined(__HIPCC__) @@ -181,12 +179,12 @@ static_assert(__hip_internal::is_trivial{}, static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public) && offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data), "internal hip_bfloat16 does not match public hip_bfloat16"); -#endif -inline HIP_OSTREAM& operator<<(HIP_OSTREAM& os, const hip_bfloat16& bf16) +inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16) { - return os << bf16; + return os << float(bf16); } +#endif inline __HOST_DEVICE__ hip_bfloat16 operator+(hip_bfloat16 a) { diff --git a/include/hip/nvidia_detail/nvidia_hip_gl_interop.h b/include/hip/nvidia_detail/nvidia_hip_gl_interop.h new file mode 100644 index 00000000..ce2b2f3e --- /dev/null +++ b/include/hip/nvidia_detail/nvidia_hip_gl_interop.h @@ -0,0 +1,44 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef HIP_INCLUDE_NVIDIA_HIP_GL_INTEROP_H +#define HIP_INCLUDE_NVIDIA_HIP_GL_INTEROP_H + +#include + +typedef enum cudaGLDeviceList hipGLDeviceList; +#define hipGLDeviceListAll cudaGLDeviceListAll +#define hipGLDeviceListCurrentFrame cudaGLDeviceListCurrentFrame +#define hipGLDeviceListNextFrame cudaGLDeviceListNextFrame + +inline static hipError_t hipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, + hipGLDeviceList deviceList) { + return hipCUDAErrorTohipError(cudaGLGetDevices(pHipDeviceCount, pHipDevices, hipDeviceCount, deviceList)); +} + +inline static hipError_t hipGraphicsGLRegisterBuffer(hipGraphicsResource** resource, GLuint buffer, unsigned int flags) { + return hipCUDAErrorTohipError(cudaGraphicsGLRegisterBuffer(resource, buffer, flags)); +} + +inline static hipError_t hipGraphicsGLRegisterImage(hipGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags) { + return hipCUDAErrorTohipError(cudaGraphicsGLRegisterImage(resource, image, target, flags)); +} +#endif diff --git a/include/hip/nvidia_detail/nvidia_hip_runtime_api.h b/include/hip/nvidia_detail/nvidia_hip_runtime_api.h index 0c492b7c..f2ffc7f0 100644 --- a/include/hip/nvidia_detail/nvidia_hip_runtime_api.h +++ b/include/hip/nvidia_detail/nvidia_hip_runtime_api.h @@ -27,7 +27,6 @@ THE SOFTWARE. #include #include #include -#include #include @@ -39,6 +38,7 @@ THE SOFTWARE. #define CUDA_11030 11030 #define CUDA_11040 11040 #define CUDA_11060 11060 +#define CUDA_12000 12000 #ifdef __cplusplus extern "C" { @@ -1302,11 +1302,6 @@ typedef cudaExternalSemaphore_t hipExternalSemaphore_t; typedef struct cudaExternalSemaphoreSignalParams hipExternalSemaphoreSignalParams; typedef struct cudaExternalSemaphoreWaitParams hipExternalSemaphoreWaitParams; -typedef enum cudaGLDeviceList hipGLDeviceList; -#define hipGLDeviceListAll cudaGLDeviceListAll -#define hipGLDeviceListCurrentFrame cudaGLDeviceListCurrentFrame -#define hipGLDeviceListNextFrame cudaGLDeviceListNextFrame - typedef struct cudaGraphicsResource hipGraphicsResource; typedef cudaGraphicsResource_t hipGraphicsResource_t; @@ -2773,6 +2768,7 @@ inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig)); } +#if CUDA_VERSION < CUDA_12000 __HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset, struct textureReference* tex, const void* devPtr, @@ -2786,6 +2782,8 @@ __HIP_DEPRECATED inline static hipError_t hipBindTexture2D( const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) { return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch)); } +#endif // CUDA_VERSION < CUDA_12000 + inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { @@ -2818,10 +2816,12 @@ inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDe return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject)); } +#if CUDA_VERSION < CUDA_12000 __HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset( size_t* offset, const struct textureReference* texref) { return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref)); } +#endif inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) { @@ -2890,19 +2890,6 @@ inline static hipError_t hipDestroyExternalMemory(hipExternalMemory_t extMem) { return hipCUDAErrorTohipError(cudaDestroyExternalMemory(extMem)); } -inline static hipError_t hipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, - hipGLDeviceList deviceList) { - return hipCUDAErrorTohipError(cudaGLGetDevices(pHipDeviceCount, pHipDevices, hipDeviceCount, deviceList)); -} - -inline static hipError_t hipGraphicsGLRegisterBuffer(hipGraphicsResource** resource, GLuint buffer, unsigned int flags) { - return hipCUDAErrorTohipError(cudaGraphicsGLRegisterBuffer(resource, buffer, flags)); -} - -inline static hipError_t hipGraphicsGLRegisterImage(hipGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags) { - return hipCUDAErrorTohipError(cudaGraphicsGLRegisterImage(resource, image, target, flags)); -} - inline static hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, hipStream_t stream __dparm(0)) { return hipCUDAErrorTohipError(cudaGraphicsMapResources(count, resources, stream)); } @@ -3067,6 +3054,7 @@ inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( blockSize, dynamicSMemSize, flags)); } +#if CUDA_VERSION < CUDA_12000 template inline static hipError_t hipBindTexture(size_t* offset, const struct texture& tex, const void* devPtr, size_t size = UINT_MAX) { @@ -3109,6 +3097,7 @@ __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray( struct texture& tex, hipArray_const_t array) { return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array)); } +#endif // CUDA_VERSION < CUDA_12000 template inline static hipChannelFormatDesc hipCreateChannelDesc() { @@ -3470,8 +3459,10 @@ inline static hipError_t hipStreamGetCaptureInfo_v2( hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, unsigned long long* id_out __dparm(0), hipGraph_t* graph_out __dparm(0), const hipGraphNode_t** dependencies_out __dparm(0), size_t* numDependencies_out __dparm(0)) { - return hipCUDAErrorTohipError(cudaStreamGetCaptureInfo_v2( - stream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out)); + return hipCUResultTohipError(cuStreamGetCaptureInfo_v2( + stream, reinterpret_cast(captureStatus_out), + reinterpret_cast(id_out), graph_out, + dependencies_out, numDependencies_out)); } #endif diff --git a/src/hip_context.cpp b/src/hip_context.cpp index 824e6959..9f6fd930 100644 --- a/src/hip_context.cpp +++ b/src/hip_context.cpp @@ -26,7 +26,7 @@ #include "utils/versions.hpp" std::vector g_devices; - +amd::Monitor g_hipInitlock{"hipInit lock"}; namespace hip { thread_local TlsAggregator tls; Device* host_device = nullptr; diff --git a/src/hip_device.cpp b/src/hip_device.cpp index fedd62e5..a3d059ac 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -107,13 +107,16 @@ void Device::RemoveStreamFromPools(Stream* stream) { // ================================================================================================ void Device::Reset() { - auto it = mem_pools_.begin(); - while (it != mem_pools_.end()) { - auto current = it++; - (*current)->ReleaseAllMemory(); - delete *current; + { + amd::ScopedLock lock(lock_); + auto it = mem_pools_.begin(); + while (it != mem_pools_.end()) { + auto current = it++; + (*current)->ReleaseAllMemory(); + delete *current; + } + mem_pools_.clear(); } - mem_pools_.clear(); flags_ = hipDeviceScheduleSpin; hip::Stream::destroyAllStreams(deviceId_); amd::MemObjMap::Purge(devices()[0]); diff --git a/src/hip_device_runtime.cpp b/src/hip_device_runtime.cpp index def9bcb0..1a73ecd8 100644 --- a/src/hip_device_runtime.cpp +++ b/src/hip_device_runtime.cpp @@ -474,7 +474,7 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { // No way to set cache config yet. - HIP_RETURN(hipErrorNotSupported); + HIP_RETURN(hipSuccess); } hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { @@ -506,7 +506,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { // No way to set cache config yet. - HIP_RETURN(hipErrorNotSupported); + HIP_RETURN(hipSuccess); } hipError_t hipDeviceSynchronize ( void ) { @@ -518,7 +518,8 @@ hipError_t hipDeviceSynchronize ( void ) { HIP_RETURN(hipErrorOutOfMemory); } - if (hip::Stream::StreamCaptureOngoing() == true) { + if (hip::Stream::StreamCaptureOngoing(reinterpret_cast( + hip::getCurrentDevice()->GetNullStream())) == true) { HIP_RETURN(hipErrorStreamCaptureUnsupported); } diff --git a/src/hip_event.cpp b/src/hip_event.cpp index f556cabe..cf301572 100644 --- a/src/hip_event.cpp +++ b/src/hip_event.cpp @@ -30,21 +30,21 @@ namespace hip { static amd::Monitor eventSetLock{"Guards global event set"}; static std::unordered_set eventSet; -bool Event::ready() { +bool Event::ready(eventType type) { if (event_->status() != CL_COMPLETE) { event_->notifyCmdQueue(); } // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status - bool ready = g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_); + bool ready = CheckHwEvent(type); if (!ready) { ready = (event_->status() == CL_COMPLETE); } return ready; } -bool EventDD::ready() { +bool EventDD::ready(eventType type) { // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status - bool ready = g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_); + bool ready = CheckHwEvent(type); // FIXME: Remove status check entirely if (!ready) { ready = (event_->status() == CL_COMPLETE); @@ -60,7 +60,7 @@ hipError_t Event::query() { return hipSuccess; } - return ready() ? hipSuccess : hipErrorNotReady; + return ready(Query) ? hipSuccess : hipErrorNotReady; } hipError_t Event::synchronize() { @@ -108,7 +108,7 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) { return hipErrorInvalidHandle; } - if (!ready()) { + if (!ready(ElapsedTime)) { return hipErrorNotReady; } @@ -124,7 +124,7 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) { return hipErrorInvalidHandle; } - if (!ready() || !eStop.ready()) { + if (!ready(ElapsedTime) || !eStop.ready(ElapsedTime)) { return hipErrorNotReady; } @@ -199,7 +199,7 @@ hipError_t Event::streamWait(hipStream_t stream, uint flags) { amd::HostQueue* queue = hip::getQueue(stream); // Access to event_ object must be lock protected amd::ScopedLock lock(lock_); - if ((event_ == nullptr) || (event_->command().queue() == queue) || ready()) { + if ((event_ == nullptr) || (event_->command().queue() == queue) || ready(StreamWait)) { return hipSuccess; } if (!event_->notifyCmdQueue()) { @@ -404,9 +404,6 @@ hipError_t hipEventSynchronize(hipEvent_t event) { HIP_RETURN(hipErrorInvalidHandle); } - if (hip::Stream::StreamCaptureOngoing() == true) { - HIP_RETURN(hipErrorStreamCaptureUnsupported); - } hip::Event* e = reinterpret_cast(event); HIP_RETURN(e->synchronize()); } diff --git a/src/hip_event.hpp b/src/hip_event.hpp index e08ea33f..b3cd9d53 100644 --- a/src/hip_event.hpp +++ b/src/hip_event.hpp @@ -89,6 +89,7 @@ class EventMarker : public amd::Marker { } }; +enum eventType { Query, StreamWait, ElapsedTime }; class Event { /// event recorded on stream where capture is active bool onCapture_; @@ -96,6 +97,16 @@ class Event { hipStream_t captureStream_ = nullptr; /// Previous captured nodes before event record std::vector nodesPrevToRecorded_; + protected: + bool CheckHwEvent(eventType type) { + bool ready; + if (type == Query) { + ready = g_devices[deviceId()]->devices()[0]->IsHwEventReadyForcedWait(*event_); + } else { + ready = g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_); + } + return ready; + } public: Event(unsigned int flags) : flags(flags), lock_("hipEvent_t", true), @@ -170,7 +181,7 @@ class Event { return hipErrorInvalidConfiguration; } virtual bool awaitEventCompletion(); - virtual bool ready(); + virtual bool ready(eventType type); virtual int64_t time(bool getStartTs) const; protected: @@ -190,7 +201,7 @@ class EventDD : public Event { virtual ~EventDD() {} virtual bool awaitEventCompletion(); - virtual bool ready(); + virtual bool ready(eventType type); virtual int64_t time(bool getStartTs) const; }; diff --git a/src/hip_graph.cpp b/src/hip_graph.cpp index 4b5d9e15..5d11dab0 100644 --- a/src/hip_graph.cpp +++ b/src/hip_graph.cpp @@ -29,7 +29,7 @@ std::vector g_captureStreams; amd::Monitor g_captureStreamsLock{"StreamCaptureGlobalList"}; -static amd::Monitor g_streamSetLock{"StreamCaptureset"}; +amd::Monitor g_streamSetLock{"StreamCaptureset"}; std::unordered_set g_allCapturingStreams; inline hipError_t ihipGraphAddNode(hipGraphNode_t graphNode, hipGraph_t graph, diff --git a/src/hip_internal.hpp b/src/hip_internal.hpp index 99bdf00d..b0b8f0c4 100644 --- a/src/hip_internal.hpp +++ b/src/hip_internal.hpp @@ -71,7 +71,7 @@ typedef struct ihipIpcEventHandle_st { const char* ihipGetErrorName(hipError_t hip_error); -static amd::Monitor g_hipInitlock{"hipInit lock"}; +extern amd::Monitor g_hipInitlock; #define HIP_INIT(noReturn) {\ amd::ScopedLock lock(g_hipInitlock); \ if (!amd::Runtime::initialized()) { \ @@ -299,7 +299,7 @@ namespace hip { static void destroyAllStreams(int deviceId); /// Check Stream Capture status to make sure it is done - static bool StreamCaptureOngoing(void); + static bool StreamCaptureOngoing(hipStream_t hStream); /// Returns capture status of the current stream hipStreamCaptureStatus GetCaptureStatus() const { return captureStatus_; } @@ -382,7 +382,7 @@ namespace hip { /// HIP Device class class Device { - amd::Monitor lock_{"Device lock"}; + amd::Monitor lock_{"Device lock", true}; /// ROCclr context amd::Context* context_; /// Device's ID @@ -569,5 +569,6 @@ constexpr bool kMarkerDisableFlush = true; //!< Avoids command batch flush in extern std::vector g_captureStreams; extern amd::Monitor g_captureStreamsLock; +extern amd::Monitor g_streamSetLock; extern std::unordered_set g_allCapturingStreams; #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index f3fcf68f..46316ed2 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -702,7 +702,8 @@ hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDevice // Since we are using SVM buffer DevicePtr and HostPtr is the same void* ptr = dptr; - amd::Memory* svmMem = getMemoryObjectWithOffset(ptr); + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); if (svmMem == nullptr) { HIP_RETURN(hipErrorNotFound); } diff --git a/src/hip_module.cpp b/src/hip_module.cpp index f3ae2611..345f65bc 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -368,6 +368,12 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, if (status != hipSuccess) { return status; } + + // Make sure the app doesn't launch a workgroup bigger than the global size + if (globalWorkSizeX < blockDimX) blockDimX = globalWorkSizeX; + if (globalWorkSizeY < blockDimY) blockDimY = globalWorkSizeY; + if (globalWorkSizeZ < blockDimZ) blockDimZ = globalWorkSizeZ; + amd::Command* command = nullptr; amd::HostQueue* queue = hip::getQueue(hStream); status = ihipLaunchKernelCommand(command, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, @@ -516,7 +522,8 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* return hipErrorInvalidValue; } - if ((flags < 0) || (flags > hipCooperativeLaunchMultiDeviceNoPostSync)) { + if (flags > (hipCooperativeLaunchMultiDeviceNoPostSync + + hipCooperativeLaunchMultiDeviceNoPreSync)) { return hipErrorInvalidValue; } @@ -730,7 +737,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL return hipErrorInvalidValue; } - amd::HostQueue* queue = reinterpret_cast(launch.stream)->asHostQueue(); + amd::HostQueue* queue = hip::getQueue(launch.stream); hipFunction_t func = nullptr; // The order of devices in the launch may not match the order in the global array for (size_t dev = 0; dev < g_devices.size(); ++dev) { diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index 3d1e9168..f20f3998 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -211,8 +211,18 @@ void Stream::destroyAllStreams(int deviceId) { } } -bool Stream::StreamCaptureOngoing(void) { - return (g_allCapturingStreams.empty() == true) ? false : true; +bool Stream::StreamCaptureOngoing(hipStream_t hStream) { + hip::Stream* s = reinterpret_cast(hStream); + // If any local thread has an ongoing or concurrent capture sequence initiated + // with hipStreamCaptureModeGlobal, it is prohibited from unsafe calls + if (s != nullptr && s->GetCaptureMode() == hipStreamCaptureModeGlobal) { + amd::ScopedLock lock(g_captureStreamsLock); + return (g_captureStreams.empty() == true) ? false : true; + } + else { + amd::ScopedLock lock(g_streamSetLock); + return (g_allCapturingStreams.find(s) == g_allCapturingStreams.end() ? false : true); + } } };// hip namespace @@ -448,7 +458,7 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) { } if (stream != nullptr) { // If still capturing return error - if (hip::Stream::StreamCaptureOngoing() == true) { + if (hip::Stream::StreamCaptureOngoing(stream) == true) { HIP_RETURN(hipErrorStreamCaptureUnsupported); } } @@ -534,12 +544,6 @@ hipError_t hipStreamWaitEvent_common(hipStream_t stream, hipEvent_t event, unsig return hipErrorContextIsDestroyed; } - if (stream != nullptr) { - // If still capturing return error - if (hip::Stream::StreamCaptureOngoing() == true) { - HIP_RETURN(hipErrorStreamCaptureIsolation); - } - } hip::Event* e = reinterpret_cast(event); return e->streamWait(stream, flags); } @@ -564,7 +568,7 @@ hipError_t hipStreamQuery_common(hipStream_t stream) { } if (stream != nullptr) { // If still capturing return error - if (hip::Stream::StreamCaptureOngoing() == true) { + if (hip::Stream::StreamCaptureOngoing(stream) == true) { HIP_RETURN(hipErrorStreamCaptureUnsupported); } } @@ -695,7 +699,7 @@ hipError_t hipLaunchHostFunc_spt(hipStream_t stream, hipHostFn_t fn, void* userD // ================================================================================================ hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData) { HIP_INIT_API(hipLaunchHostFunc, stream, fn, userData); - if (stream == nullptr) { + if (stream == nullptr && (hip::Stream::StreamCaptureOngoing(stream) == true)) { HIP_RETURN(hipErrorStreamCaptureImplicit); } HIP_RETURN(hipLaunchHostFunc_common(stream, fn, userData));