Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
131 commits
Select commit Hold shift + click to select a range
513f861
1. Disable third_party_install so flow doesn't automatically build AP…
jithunnair-amd May 5, 2020
ed421e9
Update setup.py to hipify before building extension
jithunnair-amd May 6, 2020
e82fa34
Cooperative groups is not supported by HIP yet, so replace with worka…
jithunnair-amd May 6, 2020
bdb8421
Use ROCm APEX
jithunnair-amd May 8, 2020
7c0e6ac
Update ROCm APEX commit to get FusedLayerNorm and hipification fixes
jithunnair-amd May 8, 2020
23200d4
Update requirements to use tensorflow-rocm package instead of tensorf…
jithunnair-amd May 8, 2020
c10bdcb
Use DeepSpeedExamples fork
jithunnair-amd May 8, 2020
43212b3
Typo
jithunnair-amd May 8, 2020
54ad8a5
Use changes_for_rocm_build branch for jithunnair-amd fork of DeepSpee…
jithunnair-amd May 16, 2020
453d501
Update ROCm APEX commit
jithunnair-amd May 28, 2020
4454bc2
Update DeepSpeedExamples commit
jithunnair-amd May 29, 2020
db28f75
Update ROCm Apex commit
jithunnair-amd Jun 25, 2020
30f661e
Merge branch upstream into master
jithunnair-amd Sep 15, 2020
077638d
Enable cooperative groups for ROCm
jithunnair-amd Sep 15, 2020
66c135e
Update setup.py to build lamb extension for ROCm
jithunnair-amd Sep 15, 2020
9379918
Do not install torch and torchvision for ROCm using pip
jithunnair-amd Sep 15, 2020
b5866a6
Use ROCm fork of DeepSpeedExamples
jithunnair-amd Sep 16, 2020
9c624c2
Update DeepSpeedExamples commit to use ROCm fork master branch
jithunnair-amd Sep 16, 2020
ab6aca1
Update DeepSpeedExamples commit
jithunnair-amd Sep 26, 2020
884f08e
ROCm PyTorch can be installed in the user local area in some cases
jithunnair-amd Sep 26, 2020
17febe5
Remove requirements.txt since upstream moved it to requirements folder
jithunnair-amd Sep 29, 2020
46d64e2
Add Dockerfile for ROCm
jithunnair-amd Sep 30, 2020
c2d4cc0
Add skips for unit tests that fail on ROCm. Current status: 72 passed…
jithunnair-amd Sep 30, 2020
9f0c80d
Enable CPU adam extension for ROCm
jithunnair-amd Oct 19, 2020
cb3f83a
Install requirements as appropriate for ROCm
jithunnair-amd Oct 27, 2020
617027f
Skip additional unit tests that fail on CI (but not locally)
jithunnair-amd Oct 28, 2020
a508e62
Do not skip unit tests which pass with latest PyTorch
jithunnair-amd Nov 3, 2020
3dd5e2d
Modify include files to build CPU Adam extension
jithunnair-amd Nov 3, 2020
77cd5c3
Update setup.py for latest hipify
jithunnair-amd Dec 16, 2020
7f9bbeb
Update CPU Adam header files to remove ifdefing unnecessary with late…
jithunnair-amd Dec 16, 2020
ea71005
Hipified transformer kernel extensions
jithunnair-amd Dec 23, 2020
fbddd93
Cooperative Groups workaround for transformer kernels extension
jithunnair-amd Dec 23, 2020
9091b20
Update apex commit
jithunnair-amd Jan 7, 2021
3edda06
Merge from upstream; resolve conflicts; checkout 'theirs' for tests/u…
jithunnair-amd Mar 26, 2021
5e6bb85
Integrate op_builder from upstream and update for ROCm
jithunnair-amd Mar 26, 2021
67ed124
Update Dockerfile.rocm
jithunnair-amd Mar 27, 2021
c4fe427
Temporary hacks to workaround: 1) setup.py issues on ROCm wrt. absolu…
jithunnair-amd Mar 27, 2021
74ebc97
torch.version.cuda doesn't exist for ROCm PyTorch
jithunnair-amd Mar 27, 2021
1bb74d0
Add hip_version
jithunnair-amd Mar 29, 2021
3d4e19d
Check hip version for ROCm builds
jithunnair-amd Mar 29, 2021
9939bd7
Remove unused dir
jithunnair-amd Mar 31, 2021
99571e5
Skipped the tests with the error,
rraminen Apr 8, 2021
9d8ad53
Updated Dockerfile.rocm
rraminen Apr 9, 2021
e323eab
Merge pull request #5 from ROCmSoftwarePlatform/Dockerfile.rocm_PR
jithunnair-amd Apr 10, 2021
529ebcd
Update skipIfRocm to add customizable reason string (#6)
jithunnair-amd Apr 12, 2021
37651f3
Disable AVX512 for ROCm to enable same build of DeepSpeed to work on …
jithunnair-amd Apr 13, 2021
7be71d3
Update headers and include_dirs to enable transformer extension (#8)
jithunnair-amd Apr 19, 2021
1c69737
Add patched CG headers to rocm install path (#9)
jithunnair-amd Apr 19, 2021
ac4f8d5
Update DeepSpeedExamples commit (#10)
jithunnair-amd Apr 19, 2021
14204ab
Update DeepSpeedExamples commit
jithunnair-amd Apr 21, 2021
827ebfb
Update DeepSpeedExamples commit
jithunnair-amd Apr 22, 2021
2f77a87
v0.3.15 IFU
rraminen Apr 28, 2021
0d06e02
Merge pull request #12 from rraminen/IFU
jeffdaily Apr 28, 2021
3f2657f
Add Github Actions ifu.yml
jithunnair-amd May 11, 2021
9b41aa7
Update ifu.yml to ignore DeepSpeedExamples
jithunnair-amd May 12, 2021
497f5a1
Merge remote-tracking branch 'upstream/master' into IFU-master-2021-0…
jithunnair-amd May 12, 2021
b1563d6
Merge pull request #13 from ROCmSoftwarePlatform/IFU-master-2021-05-12
jithunnair-amd May 12, 2021
2066405
Update DeepSpeedExamples commit
jithunnair-amd May 12, 2021
0a87051
Merge remote-tracking branch 'upstream/master'
invalid-email-address May 17, 2021
e827515
Use branch name in PR title/branch name
jithunnair-amd May 17, 2021
ae10359
Merge pull request #14 from ROCmSoftwarePlatform/IFU-master-2021-05-17
jithunnair-amd May 17, 2021
4c7a252
Add email functionality
jithunnair-amd May 21, 2021
7b900de
IFU-master-2021-05-27
rraminen May 28, 2021
5de081e
Pointed DeepSpeedExamples to latest commit after IFU
rraminen Jun 4, 2021
1850f88
Merge pull request #17 from rraminen/IFU_5_27
jithunnair-amd Jun 4, 2021
a62e1c7
Enabling the tests as installable version of cupy is now available on…
rraminen Jun 4, 2021
150dc1a
Skipped these tests as they fail on ROCm
rraminen Jun 4, 2021
dad3b5f
Merge remote-tracking branch 'upstream/master'
invalid-email-address Jun 7, 2021
c635d79
skipping other failing tests
rraminen Jun 9, 2021
d296665
Revert "Add patched CG headers to rocm install path (#9)"
rraminen Jun 23, 2021
f50fa7b
Revert "Update headers and include_dirs to enable transformer extensi…
rraminen Jun 23, 2021
2585f29
Added back the required code from the commits, 1c69737e1a8a8ae5ed9d29…
rraminen Jun 23, 2021
0be9645
Revert "Cooperative Groups workaround for transformer kernels extension"
rraminen Jun 23, 2021
f428da5
Added defined(__HIP_PLATFORM_HCC__) to kernels code
rraminen Jun 23, 2021
ed2ee34
Revert "Enable cooperative groups for ROCm"
rraminen Jun 23, 2021
742fd64
Enable cooperative groups for ROCm
rraminen Jun 23, 2021
1d20b14
Added CuPy installation from source
rraminen Jun 24, 2021
f6c79ae
Added h5py installation
rraminen Jun 24, 2021
0cf3306
Merge pull request #20 from rraminen/PR_Update_Dockerfile
jithunnair-amd Jun 28, 2021
81b744e
hip cooperative groups functionality for coalesced_group in fused_lam…
rraminen Jun 28, 2021
bf2979b
Merge pull request #21 from rraminen/PR_Revert_HIP_Cooperative_Groups…
jithunnair-amd Jun 29, 2021
5b0fac7
Revert "Merge pull request #21 from rraminen/PR_Revert_HIP_Cooperativ…
jithunnair-amd Jun 29, 2021
d98da5c
Merge pull request #19 from ROCmSoftwarePlatform/IFU-master-2021-06-07
jithunnair-amd Jun 30, 2021
1c9c561
IFU-master-2021-07-02
rraminen Jul 2, 2021
93ed86c
Included code for ROCm in include_paths()
rraminen Jul 2, 2021
e910bf2
Removed commented text
rraminen Jul 2, 2021
440d3bc
Merge pull request #18 from rraminen/rocm_PR_to_skip_failing_tests
jithunnair-amd Jul 6, 2021
a24b8ec
Added torch_available for ROCm specific check
rraminen Jul 6, 2021
536d0bb
Merge pull request #24 from rraminen/IFU-master-2021-07-02
jithunnair-amd Jul 6, 2021
15efc81
Make torch version check numeric
rraminen Jul 19, 2021
7c6bb76
Merge pull request #27 from rraminen/SWDEV_295133
jithunnair-amd Jul 20, 2021
c4ec23f
IFU-master-2021-07-26
rraminen Jul 26, 2021
cab7456
Enabled few tests which work on ROCm
rraminen Aug 4, 2021
32d448c
Merge pull request #30 from rraminen/IFU-master-2021-07-26
jithunnair-amd Aug 12, 2021
1d156bc
Merge pull request #31 from rraminen/PR_enable_megatron_tests
jithunnair-amd Aug 12, 2021
77bb30c
Merge remote-tracking branch 'upstream/master'
invalid-email-address Aug 12, 2021
4305aa7
Trigger Build
okakarpa Aug 16, 2021
d849d4b
Disabled send email
rraminen Aug 17, 2021
826c97b
Added ROCM_VERSION compiler directives to enable the build on ROCm 4.…
rraminen Aug 24, 2021
e58830d
Merge pull request #35 from rraminen/update_ifu_yml
jithunnair-amd Aug 24, 2021
902fabc
Merge pull request #32 from ROCmSoftwarePlatform/IFU-master-2021-08-12
jithunnair-amd Aug 24, 2021
93013ee
Using ROCM_MAJOR and ROCM_MINOR values from builder.py
rraminen Aug 25, 2021
b958aa7
Merge pull request #36 from rraminen/DeepSpeed_build_error_rocm4.4
jithunnair-amd Aug 25, 2021
ee9ef00
Updated DeepSpeedExamples commit (#37)
rraminen Sep 4, 2021
8d13a0c
Updated the DeepSpeedExamples submodule commit (#41)
rraminen Sep 15, 2021
9a1fdd7
Added 4-byte alignment on NCCL/RCCL (Cherry pick from upstream) (#42)
amathews-amd Sep 22, 2021
141ed70
IFU-master-2021-09-29
rraminen Sep 29, 2021
389cb5c
Trigger Build
rraminen Sep 30, 2021
f6d0071
Fix to enable DeepSpeed build on ROCm5.0.0 and above versions (#44)
rraminen Oct 1, 2021
9cf0419
Trigger Build
rraminen Oct 1, 2021
d4f0402
Trigger Build
rraminen Oct 1, 2021
5ab64f3
Trigger Build
rraminen Oct 3, 2021
03d9c9c
Trigger Build
rraminen Oct 4, 2021
7926893
Trigger Build
rraminen Oct 8, 2021
181a6b7
Trigger CI
rraminen Oct 29, 2021
f006d4f
Workaround to avoid ERROR: Package 'cupy' requires a different Python…
rraminen Nov 8, 2021
18940cb
Trigger CI
rraminen Nov 11, 2021
c9be5b8
Trigger CI
rraminen Nov 12, 2021
2db31ab
Trigger CI
rraminen Nov 15, 2021
2bc2f49
Merge pull request #43 from rraminen/IFU-master-2021-09-29
jithunnair-amd Nov 18, 2021
7097800
Updated DeepSpeedExamples commit
rraminen Nov 18, 2021
4e7b672
Merge pull request #47 from rraminen/DeepSpeed_Examples_commit
jithunnair-amd Nov 18, 2021
86b6f99
Resolved conflicts in setup.py, op_builder/__init__.py and pointed De…
rraminen Nov 23, 2021
2280803
Merge pull request #48 from rraminen/IFU-master-2021-11-23
jithunnair-amd Nov 23, 2021
536ee9c
requirements-rocm.txt is not required anymore
rraminen Nov 30, 2021
c7ba7ee
Merge pull request #49 from rraminen/requirements_fix
jithunnair-amd Nov 30, 2021
791ae24
Updated DeepSpeedExamples (#50)
rraminen Dec 7, 2021
bb4d5bf
THCudaCheck is deprecated (#51)
rraminen Dec 17, 2021
6f7c5c2
THCGeneral.h header file is deprecated (#54)
rraminen Feb 8, 2022
09f8f1f
Megatron-LM GPT2 ZeRO2 model on 16 GPUs (#55)
rraminen Mar 1, 2022
1614e98
Install MLNX_OFED to make best use of HPE-MI100-XGMI systems
Mar 29, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions .github/workflows/ifu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: IntegrateFromUpstream
on:
# schedule:
# # verified via crontab.guru website. “At 06:55 on Monday.”
# - cron: '55 6 * * 1'
workflow_dispatch:
inputs:
message:
description: 'Reason for manual trigger'
required: false
default: 'refresh branch'
jobs:
IntegrateFromUpstream:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Get Current Date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Extract branch name
id: extract_branch
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
- name: Fetch and Merge
id: fetch_and_merge
run: |
echo "Reason for trigger: ${{ github.event.inputs.message }}"
echo "Actor for trigger: ${{ github.actor }}"
git config user.name github-actions
git config user.email github-actions@github.com
git remote add upstream https://github.com/microsoft/DeepSpeed
git fetch upstream master
git merge upstream/master
# Since we use our own fork of DeepSpeedExamples, ignore theirs
git checkout HEAD DeepSpeedExamples
- name: Create Pull Request
id: create_pull_request
uses: jithunnair-amd/create-pull-request@v3
with:
# token: ${{ secrets.PAT }}
branch: IFU-${{ steps.extract_branch.outputs.branch }}-${{ steps.date.outputs.date }}
title: IFU-${{ steps.extract_branch.outputs.branch }}-${{ steps.date.outputs.date }}
assignees: rraminen
reviewers: jithunnair-amd
delete-branch: true
# - name: Send email
# uses: jithunnair-amd/action-send-mail@v3.1.0
# if: always()
# with:
# server_address: smtp.gmail.com
# server_port: 465
# secure: true
# username: ${{ secrets.GMAIL_USERNAME }}
# password: ${{ secrets.GMAIL_PASSWORD }}
# subject: IFU to ${{ steps.extract_branch.outputs.branch }} branch of ${{ github.repository }}
# to: Jithun.Nair@amd.com, RamyaSai.Ramineni@amd.com
# from: ${{ secrets.GMAIL_USERNAME }}
# html_body: |
# <b>Fetch and Merge</b>: ${{ steps.fetch_and_merge.outcome }} <br/>
# <b>Create Pull Request</b>: ${{ steps.create_pull_request.outcome }} <br/>"""
# <b>Pull request</b>: <a href="${{ steps.create_pull_request.outputs.pull-request-url }}">${{ steps.create_pull_request.outputs.pull-request-url }}</a> <br/>
3 changes: 1 addition & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
[submodule "DeepSpeedExamples"]
path = DeepSpeedExamples
url = https://github.com/microsoft/DeepSpeedExamples
branch = master
url = https://github.com/ROCmSoftwarePlatform/DeepSpeedExamples.git
2 changes: 1 addition & 1 deletion DeepSpeedExamples
18 changes: 18 additions & 0 deletions csrc/includes/cublas_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#include <cuda.h>
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#ifndef __HIP_PLATFORM_HCC__
#include <mma.h>
#endif
#include <stdio.h>

int cublas_gemm_ex(cublasHandle_t handle,
Expand All @@ -19,7 +21,11 @@ int cublas_gemm_ex(cublasHandle_t handle,
const float* A,
const float* B,
float* C,
#ifdef __HIP_PLATFORM_HCC__
rocblas_gemm_algo algo = rocblas_gemm_algo_standard);
#else
cublasGemmAlgo_t algo = CUBLAS_GEMM_DEFAULT);
#endif

int cublas_gemm_ex(cublasHandle_t handle,
cublasOperation_t transa,
Expand All @@ -32,7 +38,11 @@ int cublas_gemm_ex(cublasHandle_t handle,
const __half* A,
const __half* B,
__half* C,
#ifdef __HIP_PLATFORM_HCC__
rocblas_gemm_algo algo = rocblas_gemm_algo_standard);
#else
cublasGemmAlgo_t algo = CUBLAS_GEMM_DEFAULT_TENSOR_OP);
#endif

int cublas_strided_batched_gemm(cublasHandle_t handle,
int m,
Expand All @@ -49,7 +59,11 @@ int cublas_strided_batched_gemm(cublasHandle_t handle,
int stride_B,
int stride_C,
int batch,
#ifdef __HIP_PLATFORM_HCC__
rocblas_gemm_algo algo = rocblas_gemm_algo_standard);
#else
cublasGemmAlgo_t algo = CUBLAS_GEMM_DEFAULT);
#endif

int cublas_strided_batched_gemm(cublasHandle_t handle,
int m,
Expand All @@ -66,4 +80,8 @@ int cublas_strided_batched_gemm(cublasHandle_t handle,
int stride_B,
int stride_C,
int batch,
#ifdef __HIP_PLATFORM_HCC__
rocblas_gemm_algo algo = rocblas_gemm_algo_standard);
#else
cublasGemmAlgo_t algo = CUBLAS_GEMM_DEFAULT_TENSOR_OP);
#endif
4 changes: 4 additions & 0 deletions csrc/includes/custom_cuda_layers.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
#include <stdio.h>
#include <stdlib.h>

#ifdef __HIP_PLATFORM_HCC__
#include <hip/hip_cooperative_groups.h>
#else
#include <cooperative_groups.h>
#endif
#include <curand_kernel.h>

#include "context.h"
Expand Down
12 changes: 12 additions & 0 deletions csrc/includes/feed_forward.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@ class FeedForward {
weights,
input_ptr,
out,
#ifdef __HIP_PLATFORM_HCC__
rocblas_gemm_algo(config_.gemm_algos[0]));
#else
cublasGemmAlgo_t(config_.gemm_algos[0]));
#endif
}
void Backward(int bsz,
const T* out_grad,
Expand All @@ -68,7 +72,11 @@ class FeedForward {
input_ptr,
out_grad,
weights_grad,
#ifdef __HIP_PLATFORM_HCC__
rocblas_gemm_algo(config_.gemm_algos[1]));
#else
cublasGemmAlgo_t(config_.gemm_algos[1]));
#endif

cublas_gemm_ex(_cublasHandle,
CUBLAS_OP_N,
Expand All @@ -81,7 +89,11 @@ class FeedForward {
weights,
out_grad,
inp_grad_out,
#ifdef __HIP_PLATFORM_HCC__
rocblas_gemm_algo(config_.gemm_algos[2]));
#else
cublasGemmAlgo_t(config_.gemm_algos[2]));
#endif

launch_fuse_transpose_bias_kernel<T>(out_grad, bias_grad, bsz, config_.outputSize, stream);
}
Expand Down
36 changes: 36 additions & 0 deletions csrc/includes/gemm_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
#pragma once

#include <cuda_fp16.h>
#ifndef __HIP_PLATFORM_HCC__
#include <cuda_profiler_api.h>
#endif
#include <array>
#include <cstdio>
#include <cstdlib>
Expand Down Expand Up @@ -58,7 +60,11 @@ class GemmTest {
B,
A,
C,
#ifdef __HIP_PLATFORM_HCC__
static_cast<rocblas_gemm_algo>(algo));
#else
static_cast<cublasGemmAlgo_t>(algo));
#endif
});

int algo_bw1 = Run(loops, [=](int algo) {
Expand All @@ -73,7 +79,11 @@ class GemmTest {
A,
C,
B,
#ifdef __HIP_PLATFORM_HCC__
static_cast<rocblas_gemm_algo>(algo));
#else
static_cast<cublasGemmAlgo_t>(algo));
#endif
});

int algo_bw2 = Run(loops, [=](int algo) {
Expand All @@ -88,7 +98,11 @@ class GemmTest {
B,
C,
A,
#ifdef __HIP_PLATFORM_HCC__
static_cast<rocblas_gemm_algo>(algo));
#else
static_cast<cublasGemmAlgo_t>(algo));
#endif
});

return std::array<int, 3>({algo_fw, algo_bw1, algo_bw2});
Expand All @@ -100,8 +114,13 @@ class GemmTest {
float fast_latency = (std::numeric_limits<float>::max)();
int fast_algo = 0;

#ifdef __HIP_PLATFORM_HCC__
for (int algo = (int)rocblas_gemm_algo_standard;
algo <= (int)rocblas_gemm_algo_standard;
#else
for (int algo = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
algo <= (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
#endif
algo++) {
int warm_up = 5;
for (int i = 0; i < warm_up; ++i) f(algo);
Expand Down Expand Up @@ -186,7 +205,11 @@ class StridedGemmTest {
stride_b,
stride_c,
bsz,
#ifdef __HIP_PLATFORM_HCC__
static_cast<rocblas_gemm_algo>(algo));
#else
static_cast<cublasGemmAlgo_t>(algo));
#endif
});

int algo_bw1 = Run(loops, [=](int algo) {
Expand Down Expand Up @@ -216,7 +239,11 @@ class StridedGemmTest {
stride_b,
stride_c,
bsz,
#ifdef __HIP_PLATFORM_HCC__
static_cast<rocblas_gemm_algo>(algo));
#else
static_cast<cublasGemmAlgo_t>(algo));
#endif
});

int algo_bw2 = Run(loops, [=](int algo) {
Expand All @@ -243,7 +270,11 @@ class StridedGemmTest {
stride_b,
stride_c,
bsz,
#ifdef __HIP_PLATFORM_HCC__
static_cast<rocblas_gemm_algo>(algo));
#else
static_cast<cublasGemmAlgo_t>(algo));
#endif
});

return std::array<int, 3>({algo_fw, algo_bw1, algo_bw2});
Expand All @@ -255,8 +286,13 @@ class StridedGemmTest {
float fast_latency = (std::numeric_limits<float>::max)();
int fast_algo = 0;

#ifdef __HIP_PLATFORM_HCC__
for (int algo = (int)rocblas_gemm_algo_standard;
algo <= (int)rocblas_gemm_algo_standard;
#else
for (int algo = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
algo <= (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
#endif
algo++) {
int warm_up = 5;
for (int i = 0; i < warm_up; ++i) f(algo);
Expand Down
4 changes: 4 additions & 0 deletions csrc/includes/general_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
#include <stdio.h>
#include <stdlib.h>

#ifdef __HIP_PLATFORM_HCC__
#include <hip/hip_cooperative_groups.h>
#else
#include <cooperative_groups.h>
#endif
#include <curand_kernel.h>

#include "context.h"
Expand Down
Loading