ROCm · alextmagro · Nov 3, 2025 · Oct 21, 2025 · Oct 28, 2025 · Oct 31, 2025
@@ -535,7 +535,8 @@ def hipify(base_dir, src_dir, sources, include_dirs):
         extra_files=[],
         is_pytorch_extension=True,
         hipify_extra_files_only=False,
-        show_detailed=False)
+        show_detailed=False,
+        no_math_replace=True)
 
     # Because hipify output_directory == project_directory
     # Original sources list may contain previous hipifying results that ends up with duplicated entries

@@ -60,6 +60,7 @@ else()
   hipify(CUDA_SOURCE_DIR ${cuda_source_dir} 
     HEADER_INCLUDE_DIR ${header_include_dir}
     CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json"
+    NO_MATH_REPLACE
   )
   get_hipified_list("${test_cuda_sources}" test_hip_sources)
   message("${message_line}")

@@ -51,6 +51,9 @@ void scale_block(const ProcessingMethod processing_method,
                  const size_t j_min,
                  const size_t j_max,
                  const size_t cols) {
+#ifdef __HIP_PLATFORM_AMD__
+    using std::isnan, std::isinf;
+#endif
     float amax = 0.0f;
 
     // Find the absolute maximum value in the block
@@ -71,17 +74,10 @@ void scale_block(const ProcessingMethod processing_method,
                 elt *= static_cast<float>(grad[idx]);
             }
             dbias[j] += elt;
-#ifndef __HIP_PLATFORM_AMD__
             if (isinf(elt) || isnan(elt)) {
                 continue;
             }
             amax = std::max(amax, std::abs(elt));
-#else // #ifdef __HIP_PLATFORM_AMD__
-            if (std::isinf(elt) || std::isnan(elt)) {
-                continue;
-            }
-            amax = fmaxf(amax, fabsf(elt));
-#endif // #ifdef __HIP_PLATFORM_AMD__
         }
     }
 

@@ -93,7 +93,7 @@ void compute_ref(
       // update ref_d_amax if in fp8
       DType dtype = TypeInfo<D_Type>::dtype;
       if(isFp8Type(dtype)){
-        ref_d_amax = std::max<float>(ref_d_amax, std::fabs(val));
+        ref_d_amax = std::max(ref_d_amax, std::fabs(val));
       }
     }
   }
@@ -127,10 +127,8 @@ void compute_mxfp8_ref(
       for(size_t kk = 0; kk < k; kk++){
         size_t a_idx = transa ? (ii*k + kk) : (kk*m + ii);
         size_t b_idx = transb ? (kk*n + jj) : (jj*k + kk);
-        float a_scale_inv_val = (float)std::pow(2,
-          a_scale_inv_data[transa ? a_idx/32 : (kk/32 * m + ii)] - 127);
-        float b_scale_inv_val = (float)std::pow(2,
-          b_scale_inv_data[transb ? (kk/32 * n + jj) : b_idx/32] - 127);
+        float a_scale_inv_val = std::exp2f(a_scale_inv_data[transa ? a_idx/32 : (kk/32 * m + ii)] - 127);
+        float b_scale_inv_val = std::exp2f(b_scale_inv_data[transb ? (kk/32 * n + jj) : b_idx/32] - 127);
         val += a_scale_inv_val * (float)a_data[a_idx] * b_scale_inv_val * (float)b_data[b_idx];
       }
       if(bias_data){
@@ -144,7 +142,7 @@ void compute_mxfp8_ref(
       // update ref_d_amax if in fp8
       DType dtype = TypeInfo<D_Type>::dtype;
       if(isFp8Type(dtype)){
-        ref_d_amax = std::max<float>(ref_d_amax, std::fabs(val));
+        ref_d_amax = std::max(ref_d_amax, std::fabs(val));
       }
     }
   }
@@ -177,16 +175,11 @@ std::pair<double, double> getTestTolerances(const DType type, bool use_fp8, bool
   // relax for certain FP8 gemm with hipblaslt
   if (use_mxfp8) {
     atol = 5e-4;
-    /*During hipifying std::max is converted to ::max
-    to w/a HIP bug with using std:: in device functions.
-    W/o explicitlit <double>, compiler uses non-templated int method variant from HIP headers
-    TODO: remove when switch to new hipify version after fixing HIP bug */
-    rtol = std::max<double>(rtol, 1e-3);
+    rtol = std::max(rtol, 1e-3);
   }
   else if (use_fp8) {
     atol = 1e-3;
-    //TODO: remove <double> (see comment above)
-    rtol = std::max<double>(rtol, 5e-3);
+    rtol = std::max(rtol, 5e-3);
   }
   else if (type == DType::kBFloat16) {
     //relax for certain prime number TN gemm

@@ -547,22 +547,9 @@ void compareResults_sequential(const std::string &name, const Tensor &test,
         const double mean = (t + r) / 2;
         const double mean_p = mean >= 0 ? mean * (1 + 1e-6) : mean * (1 - 1e-6);
         const double mean_m = mean >= 0 ? mean * (1 - 1e-6) : mean * (1 + 1e-6);
-#ifndef __HIP_PLATFORM_AMD__
         const double cast_mean_p = static_cast<double>(static_cast<T>(mean_p));
         const double cast_mean_m = static_cast<double>(static_cast<T>(mean_m));
         assertion = !(cast_mean_m == std::min(t, r) && cast_mean_p == std::max(t, r));
-#else
-        const double cast_mean_p =
-            static_cast<double>(static_cast<float>(static_cast<T>(static_cast<float>(mean_p))));
-        const double cast_mean_m =
-            static_cast<double>(static_cast<float>(static_cast<T>(static_cast<float>(mean_m))));
-        /*During hipifying std::max and std::min are converted to ::max and ::min
-        to w/a HIP bug with using std:: in device functions.
-        W/o explicitlit <double>, compiler uses non-templated int method variant from HIP headers
-        TODO: remove when switch to new hipify version after fixing HIP bug */
-        assertion =
-            !(cast_mean_m == std::min<double>(t, r) && cast_mean_p == std::max<double>(t, r));
-#endif
       }
       std::string direction = rowwise ? "rowwise" : "columnwise";
       ASSERT_FALSE(assertion) << "Error in tensor " << name << " in "
@@ -603,21 +590,9 @@ static size_t getFirstMismatchIdx(const DType data_type, const T* test_data, con
       const double mean = (t + r) / 2;
       const double mean_p = mean >= 0 ? mean * (1 + 1e-6) : mean * (1 - 1e-6);
       const double mean_m = mean >= 0 ? mean * (1 - 1e-6) : mean * (1 + 1e-6);
-#ifndef __HIP_PLATFORM_AMD__
       const double cast_mean_p = static_cast<double>(static_cast<T>(mean_p));
       const double cast_mean_m = static_cast<double>(static_cast<T>(mean_m));
       assertion = !(cast_mean_m == std::min(t, r) && cast_mean_p == std::max(t, r));
-#else
-      const double cast_mean_p =
-          static_cast<double>(static_cast<float>(static_cast<T>(static_cast<float>(mean_p))));
-      const double cast_mean_m =
-          static_cast<double>(static_cast<float>(static_cast<T>(static_cast<float>(mean_m))));
-      /*During hipifying std::max and std::min are converted to ::max and ::min
-        to w/a HIP bug with using std:: in device functions.
-        W/o explicitlit <double>, compiler uses non-templated int method variant from HIP headers
-        TODO: remove when switch to new hipify version after fixing HIP bug */
-      assertion = !(cast_mean_m == std::min<double>(t, r) && cast_mean_p == std::max<double>(t, r));
-#endif
     }
     if (assertion && i < first_mismatch_idx) {
       first_mismatch_idx = i;

@@ -239,6 +239,7 @@ else()
       IGNORES "*/aotriton/*"
       IGNORES "*/ck_fused_attn/*"
       CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json"
+      NO_MATH_REPLACE
   )
   get_hipified_list("${transformer_engine_SOURCES}" te_hip_sources)
   message("${message_line}")