From f38c0cf9db94cb1cf291a2437a2162a5c3e0b8a9 Mon Sep 17 00:00:00 2001 From: alextmagro Date: Tue, 21 Oct 2025 12:35:38 -0500 Subject: [PATCH 1/2] Remove HIP macros around std:: math functions --- build_tools/utils.py | 3 ++- tests/cpp/operator/CMakeLists.txt | 1 + tests/cpp/operator/test_cast_mxfp8.cu | 10 +++------- tests/cpp/operator/test_cublaslt_gemm.cu | 19 ++++++------------ tests/cpp/test_common.cu | 25 ------------------------ transformer_engine/common/CMakeLists.txt | 1 + 6 files changed, 13 insertions(+), 46 deletions(-) diff --git a/build_tools/utils.py b/build_tools/utils.py index 9414d778c..f848bad74 100644 --- a/build_tools/utils.py +++ b/build_tools/utils.py @@ -535,7 +535,8 @@ def hipify(base_dir, src_dir, sources, include_dirs): extra_files=[], is_pytorch_extension=True, hipify_extra_files_only=False, - show_detailed=False) + show_detailed=False, + no_math_replace=True) # Because hipify output_directory == project_directory # Original sources list may contain previous hipifying results that ends up with duplicated entries diff --git a/tests/cpp/operator/CMakeLists.txt b/tests/cpp/operator/CMakeLists.txt index e2a1938ce..9d3ff719b 100644 --- a/tests/cpp/operator/CMakeLists.txt +++ b/tests/cpp/operator/CMakeLists.txt @@ -60,6 +60,7 @@ else() hipify(CUDA_SOURCE_DIR ${cuda_source_dir} HEADER_INCLUDE_DIR ${header_include_dir} CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json" + NO_MATH_REPLACE ) get_hipified_list("${test_cuda_sources}" test_hip_sources) message("${message_line}") diff --git a/tests/cpp/operator/test_cast_mxfp8.cu b/tests/cpp/operator/test_cast_mxfp8.cu index 33a9b8629..814a2ffde 100644 --- a/tests/cpp/operator/test_cast_mxfp8.cu +++ b/tests/cpp/operator/test_cast_mxfp8.cu @@ -51,6 +51,9 @@ void scale_block(const ProcessingMethod processing_method, const size_t j_min, const size_t j_max, const size_t cols) { +#ifdef __HIP_PLATFORM_AMD__ + using std::isnan, std::isinf; +#endif float amax = 0.0f; // Find the absolute maximum value in the block @@ -71,17 +74,10 @@ void scale_block(const ProcessingMethod processing_method, elt *= static_cast(grad[idx]); } dbias[j] += elt; -#ifndef __HIP_PLATFORM_AMD__ if (isinf(elt) || isnan(elt)) { continue; } amax = std::max(amax, std::abs(elt)); -#else - if (std::isinf(elt) || std::isnan(elt)) { - continue; - } - amax = fmaxf(amax, fabsf(elt)); -#endif } } diff --git a/tests/cpp/operator/test_cublaslt_gemm.cu b/tests/cpp/operator/test_cublaslt_gemm.cu index b731cc701..395c4f4fd 100644 --- a/tests/cpp/operator/test_cublaslt_gemm.cu +++ b/tests/cpp/operator/test_cublaslt_gemm.cu @@ -93,7 +93,7 @@ void compute_ref( // update ref_d_amax if in fp8 DType dtype = TypeInfo::dtype; if(isFp8Type(dtype)){ - ref_d_amax = std::max(ref_d_amax, std::fabs(val)); + ref_d_amax = std::max(ref_d_amax, std::fabs(val)); } } } @@ -127,10 +127,8 @@ void compute_mxfp8_ref( for(size_t kk = 0; kk < k; kk++){ size_t a_idx = transa ? (ii*k + kk) : (kk*m + ii); size_t b_idx = transb ? (kk*n + jj) : (jj*k + kk); - float a_scale_inv_val = (float)std::pow(2, - a_scale_inv_data[transa ? a_idx/32 : (kk/32 * m + ii)] - 127); - float b_scale_inv_val = (float)std::pow(2, - b_scale_inv_data[transb ? (kk/32 * n + jj) : b_idx/32] - 127); + float a_scale_inv_val = std::exp2f(a_scale_inv_data[transa ? a_idx/32 : (kk/32 * m + ii)] - 127); + float b_scale_inv_val = std::exp2f(b_scale_inv_data[transb ? (kk/32 * n + jj) : b_idx/32] - 127); val += a_scale_inv_val * (float)a_data[a_idx] * b_scale_inv_val * (float)b_data[b_idx]; } if(bias_data){ @@ -144,7 +142,7 @@ void compute_mxfp8_ref( // update ref_d_amax if in fp8 DType dtype = TypeInfo::dtype; if(isFp8Type(dtype)){ - ref_d_amax = std::max(ref_d_amax, std::fabs(val)); + ref_d_amax = std::max(ref_d_amax, std::fabs(val)); } } } @@ -177,16 +175,11 @@ std::pair getTestTolerances(const DType type, bool use_fp8, bool // relax for certain FP8 gemm with hipblaslt if (use_mxfp8) { atol = 5e-4; - /*During hipifying std::max is converted to ::max - to w/a HIP bug with using std:: in device functions. - W/o explicitlit , compiler uses non-templated int method variant from HIP headers - TODO: remove when switch to new hipify version after fixing HIP bug */ - rtol = std::max(rtol, 1e-3); + rtol = std::max(rtol, 1e-3); } else if (use_fp8) { atol = 1e-3; - //TODO: remove (see comment above) - rtol = std::max(rtol, 5e-3); + rtol = std::max(rtol, 5e-3); } else if (type == DType::kBFloat16) { //relax for certain prime number TN gemm diff --git a/tests/cpp/test_common.cu b/tests/cpp/test_common.cu index 28a122b13..5dc0e27e0 100644 --- a/tests/cpp/test_common.cu +++ b/tests/cpp/test_common.cu @@ -547,22 +547,9 @@ void compareResults_sequential(const std::string &name, const Tensor &test, const double mean = (t + r) / 2; const double mean_p = mean >= 0 ? mean * (1 + 1e-6) : mean * (1 - 1e-6); const double mean_m = mean >= 0 ? mean * (1 - 1e-6) : mean * (1 + 1e-6); -#ifndef __HIP_PLATFORM_AMD__ const double cast_mean_p = static_cast(static_cast(mean_p)); const double cast_mean_m = static_cast(static_cast(mean_m)); assertion = !(cast_mean_m == std::min(t, r) && cast_mean_p == std::max(t, r)); -#else - const double cast_mean_p = - static_cast(static_cast(static_cast(static_cast(mean_p)))); - const double cast_mean_m = - static_cast(static_cast(static_cast(static_cast(mean_m)))); - /*During hipifying std::max and std::min are converted to ::max and ::min - to w/a HIP bug with using std:: in device functions. - W/o explicitlit , compiler uses non-templated int method variant from HIP headers - TODO: remove when switch to new hipify version after fixing HIP bug */ - assertion = - !(cast_mean_m == std::min(t, r) && cast_mean_p == std::max(t, r)); -#endif } std::string direction = rowwise ? "rowwise" : "columnwise"; ASSERT_FALSE(assertion) << "Error in tensor " << name << " in " @@ -603,21 +590,9 @@ static size_t getFirstMismatchIdx(const DType data_type, const T* test_data, con const double mean = (t + r) / 2; const double mean_p = mean >= 0 ? mean * (1 + 1e-6) : mean * (1 - 1e-6); const double mean_m = mean >= 0 ? mean * (1 - 1e-6) : mean * (1 + 1e-6); -#ifndef __HIP_PLATFORM_AMD__ const double cast_mean_p = static_cast(static_cast(mean_p)); const double cast_mean_m = static_cast(static_cast(mean_m)); assertion = !(cast_mean_m == std::min(t, r) && cast_mean_p == std::max(t, r)); -#else - const double cast_mean_p = - static_cast(static_cast(static_cast(static_cast(mean_p)))); - const double cast_mean_m = - static_cast(static_cast(static_cast(static_cast(mean_m)))); - /*During hipifying std::max and std::min are converted to ::max and ::min - to w/a HIP bug with using std:: in device functions. - W/o explicitlit , compiler uses non-templated int method variant from HIP headers - TODO: remove when switch to new hipify version after fixing HIP bug */ - assertion = !(cast_mean_m == std::min(t, r) && cast_mean_p == std::max(t, r)); -#endif } if (assertion && i < first_mismatch_idx) { first_mismatch_idx = i; diff --git a/transformer_engine/common/CMakeLists.txt b/transformer_engine/common/CMakeLists.txt index f70c9f8bb..34a89dd27 100644 --- a/transformer_engine/common/CMakeLists.txt +++ b/transformer_engine/common/CMakeLists.txt @@ -239,6 +239,7 @@ else() IGNORES "*/aotriton/*" IGNORES "*/ck_fused_attn/*" CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json" + NO_MATH_REPLACE ) get_hipified_list("${transformer_engine_SOURCES}" te_hip_sources) message("${message_line}") From 9ff0569ca82de6e8f9e8a84855a8072c53711c62 Mon Sep 17 00:00:00 2001 From: alextmagro Date: Tue, 28 Oct 2025 16:34:02 -0500 Subject: [PATCH 2/2] hipify-torch commit --- 3rdparty/hipify_torch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/hipify_torch b/3rdparty/hipify_torch index 12ac3f401..3456cd19d 160000 --- a/3rdparty/hipify_torch +++ b/3rdparty/hipify_torch @@ -1 +1 @@ -Subproject commit 12ac3f401261ffa331a4000626a333727f06a0d8 +Subproject commit 3456cd19d4eb5e469317bfcfae1a89b7ab70f6c2