Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 3rdparty/hipify_torch
3 changes: 2 additions & 1 deletion build_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,8 @@ def hipify(base_dir, src_dir, sources, include_dirs):
extra_files=[],
is_pytorch_extension=True,
hipify_extra_files_only=False,
show_detailed=False)
show_detailed=False,
no_math_replace=True)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't find this no_math_replace option in hipify_torch. Could you post the reference URL to it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR requires this hipify-torch commit which introduces no_math_replace:

ROCm/hipify_torch#82

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, we can use commit from your hipify PR branch for time being


# Because hipify output_directory == project_directory
# Original sources list may contain previous hipifying results that ends up with duplicated entries
Expand Down
1 change: 1 addition & 0 deletions tests/cpp/operator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ else()
hipify(CUDA_SOURCE_DIR ${cuda_source_dir}
HEADER_INCLUDE_DIR ${header_include_dir}
CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json"
NO_MATH_REPLACE
)
get_hipified_list("${test_cuda_sources}" test_hip_sources)
message("${message_line}")
Expand Down
10 changes: 3 additions & 7 deletions tests/cpp/operator/test_cast_mxfp8.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ void scale_block(const ProcessingMethod processing_method,
const size_t j_min,
const size_t j_max,
const size_t cols) {
#ifdef __HIP_PLATFORM_AMD__
using std::isnan, std::isinf;
#endif
float amax = 0.0f;

// Find the absolute maximum value in the block
Expand All @@ -71,17 +74,10 @@ void scale_block(const ProcessingMethod processing_method,
elt *= static_cast<float>(grad[idx]);
}
dbias[j] += elt;
#ifndef __HIP_PLATFORM_AMD__
if (isinf(elt) || isnan(elt)) {
continue;
}
amax = std::max(amax, std::abs(elt));
#else // #ifdef __HIP_PLATFORM_AMD__
if (std::isinf(elt) || std::isnan(elt)) {
continue;
}
amax = fmaxf(amax, fabsf(elt));
#endif // #ifdef __HIP_PLATFORM_AMD__
}
}

Expand Down
19 changes: 6 additions & 13 deletions tests/cpp/operator/test_cublaslt_gemm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void compute_ref(
// update ref_d_amax if in fp8
DType dtype = TypeInfo<D_Type>::dtype;
if(isFp8Type(dtype)){
ref_d_amax = std::max<float>(ref_d_amax, std::fabs(val));
ref_d_amax = std::max(ref_d_amax, std::fabs(val));
}
}
}
Expand Down Expand Up @@ -127,10 +127,8 @@ void compute_mxfp8_ref(
for(size_t kk = 0; kk < k; kk++){
size_t a_idx = transa ? (ii*k + kk) : (kk*m + ii);
size_t b_idx = transb ? (kk*n + jj) : (jj*k + kk);
float a_scale_inv_val = (float)std::pow(2,
a_scale_inv_data[transa ? a_idx/32 : (kk/32 * m + ii)] - 127);
float b_scale_inv_val = (float)std::pow(2,
b_scale_inv_data[transb ? (kk/32 * n + jj) : b_idx/32] - 127);
float a_scale_inv_val = std::exp2f(a_scale_inv_data[transa ? a_idx/32 : (kk/32 * m + ii)] - 127);
float b_scale_inv_val = std::exp2f(b_scale_inv_data[transb ? (kk/32 * n + jj) : b_idx/32] - 127);
val += a_scale_inv_val * (float)a_data[a_idx] * b_scale_inv_val * (float)b_data[b_idx];
}
if(bias_data){
Expand All @@ -144,7 +142,7 @@ void compute_mxfp8_ref(
// update ref_d_amax if in fp8
DType dtype = TypeInfo<D_Type>::dtype;
if(isFp8Type(dtype)){
ref_d_amax = std::max<float>(ref_d_amax, std::fabs(val));
ref_d_amax = std::max(ref_d_amax, std::fabs(val));
}
}
}
Expand Down Expand Up @@ -177,16 +175,11 @@ std::pair<double, double> getTestTolerances(const DType type, bool use_fp8, bool
// relax for certain FP8 gemm with hipblaslt
if (use_mxfp8) {
atol = 5e-4;
/*During hipifying std::max is converted to ::max
to w/a HIP bug with using std:: in device functions.
W/o explicitlit <double>, compiler uses non-templated int method variant from HIP headers
TODO: remove when switch to new hipify version after fixing HIP bug */
rtol = std::max<double>(rtol, 1e-3);
rtol = std::max(rtol, 1e-3);
}
else if (use_fp8) {
atol = 1e-3;
//TODO: remove <double> (see comment above)
rtol = std::max<double>(rtol, 5e-3);
rtol = std::max(rtol, 5e-3);
}
else if (type == DType::kBFloat16) {
//relax for certain prime number TN gemm
Expand Down
25 changes: 0 additions & 25 deletions tests/cpp/test_common.cu
Original file line number Diff line number Diff line change
Expand Up @@ -547,22 +547,9 @@ void compareResults_sequential(const std::string &name, const Tensor &test,
const double mean = (t + r) / 2;
const double mean_p = mean >= 0 ? mean * (1 + 1e-6) : mean * (1 - 1e-6);
const double mean_m = mean >= 0 ? mean * (1 - 1e-6) : mean * (1 + 1e-6);
#ifndef __HIP_PLATFORM_AMD__
const double cast_mean_p = static_cast<double>(static_cast<T>(mean_p));
const double cast_mean_m = static_cast<double>(static_cast<T>(mean_m));
assertion = !(cast_mean_m == std::min(t, r) && cast_mean_p == std::max(t, r));
#else
const double cast_mean_p =
static_cast<double>(static_cast<float>(static_cast<T>(static_cast<float>(mean_p))));
const double cast_mean_m =
static_cast<double>(static_cast<float>(static_cast<T>(static_cast<float>(mean_m))));
/*During hipifying std::max and std::min are converted to ::max and ::min
to w/a HIP bug with using std:: in device functions.
W/o explicitlit <double>, compiler uses non-templated int method variant from HIP headers
TODO: remove when switch to new hipify version after fixing HIP bug */
assertion =
!(cast_mean_m == std::min<double>(t, r) && cast_mean_p == std::max<double>(t, r));
#endif
}
std::string direction = rowwise ? "rowwise" : "columnwise";
ASSERT_FALSE(assertion) << "Error in tensor " << name << " in "
Expand Down Expand Up @@ -603,21 +590,9 @@ static size_t getFirstMismatchIdx(const DType data_type, const T* test_data, con
const double mean = (t + r) / 2;
const double mean_p = mean >= 0 ? mean * (1 + 1e-6) : mean * (1 - 1e-6);
const double mean_m = mean >= 0 ? mean * (1 - 1e-6) : mean * (1 + 1e-6);
#ifndef __HIP_PLATFORM_AMD__
const double cast_mean_p = static_cast<double>(static_cast<T>(mean_p));
const double cast_mean_m = static_cast<double>(static_cast<T>(mean_m));
assertion = !(cast_mean_m == std::min(t, r) && cast_mean_p == std::max(t, r));
#else
const double cast_mean_p =
static_cast<double>(static_cast<float>(static_cast<T>(static_cast<float>(mean_p))));
const double cast_mean_m =
static_cast<double>(static_cast<float>(static_cast<T>(static_cast<float>(mean_m))));
/*During hipifying std::max and std::min are converted to ::max and ::min
to w/a HIP bug with using std:: in device functions.
W/o explicitlit <double>, compiler uses non-templated int method variant from HIP headers
TODO: remove when switch to new hipify version after fixing HIP bug */
assertion = !(cast_mean_m == std::min<double>(t, r) && cast_mean_p == std::max<double>(t, r));
#endif
}
if (assertion && i < first_mismatch_idx) {
first_mismatch_idx = i;
Expand Down
1 change: 1 addition & 0 deletions transformer_engine/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ else()
IGNORES "*/aotriton/*"
IGNORES "*/ck_fused_attn/*"
CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json"
NO_MATH_REPLACE
)
get_hipified_list("${transformer_engine_SOURCES}" te_hip_sources)
message("${message_line}")
Expand Down