Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mlir/include/mlir-c/Dialect/RockEnums.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ extern "C" {
enum RocmlirTuningParamSetKind {
RocmlirTuningParamSetKindQuick = 0,
RocmlirTuningParamSetKindFull = 1,
RocmlirTuningParamSetKindExhaustive = 2
RocmlirTuningParamSetKindGreedy = 2,
RocmlirTuningParamSetKindExhaustive = 3
};
typedef enum RocmlirTuningParamSetKind RocmlirTuningParamSetKind;

Expand Down
1 change: 1 addition & 0 deletions mlir/include/mlir/Dialect/Rock/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ namespace rock {
#define GEN_PASS_DECL_ROCKADDASYNCWAITPASS
#define GEN_PASS_DECL_ROCKADDDIRECTTOLDSALIASINFOPASS
#define GEN_PASS_DECL_CONVERTROCKOPSTOROCDLOPS
#define GEN_PASS_DECL_ROCKADDSCHEDGROUPBARRIERSPASS

#define GEN_PASS_REGISTRATION
#include "mlir/Dialect/Rock/Passes.h.inc"
Expand Down
20 changes: 20 additions & 0 deletions mlir/include/mlir/Dialect/Rock/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,26 @@ def RockBufferLoadMergePass : Pass<"rock-buffer-load-merge", "::mlir::func::Func
let dependentDialects = ["::mlir::amdgpu::AMDGPUDialect"];
}

def RockAddSchedGroupBarriersPass : Pass<"rock-add-sched-group-barriers", "::mlir::func::FuncOp"> {
let summary = "Analyze scf.for loops and insert scheduling group barriers";
let description = [{
This pass analyzes scf.for operations, counts memory operations and MFMA
instructions per iteration, and inserts scheduling group barriers:
- Global memory loads (amdgpu.raw_buffer_load, vector.load from global memory)
- LDS/workgroup memory reads (memref.load from workgroup address space)
- LDS/workgroup memory writes (memref.store to workgroup address space)
- MFMA instructions (amdgpu.mfma)

The counts factor in affine.for loop trip counts to give the total number of
operations per scf.for iteration. Based on these counts, scheduling group
barriers (ROCDL::SchedGroupBarrier) are inserted to optimize instruction
scheduling on AMD GPUs.
}];
let dependentDialects = ["::mlir::amdgpu::AMDGPUDialect", "::mlir::scf::SCFDialect",
"::mlir::affine::AffineDialect", "::mlir::gpu::GPUDialect",
"::mlir::ROCDL::ROCDLDialect"];
}

def RockTransformToMemrefPass : Pass<"rock-transform-to-memref", "::mlir::func::FuncOp"> {
let summary = "convert remaining rock.transform ops to memref.expand/collapse_shape";
let dependentDialects = ["rock::RockDialect", "affine::AffineDialect", "gpu::GPUDialect", "vector::VectorDialect", "memref::MemRefDialect"];
Expand Down
3 changes: 3 additions & 0 deletions mlir/lib/CAPI/Dialect/Rock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ mlirRockTuningSpaceCreate(MlirModule module, RocmlirTuningParamSetKind kind) {
case RocmlirTuningParamSetKindExhaustive:
ourKind = rock::TuningParamSetKind::Exhaustive;
break;
case RocmlirTuningParamSetKindGreedy:
ourKind = rock::TuningParamSetKind::Greedy;
break;
}
auto mod = unwrap(module);
rock::TuningParamSpaceSettings settings;
Expand Down
1 change: 1 addition & 0 deletions mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ void rock::buildKernelPipeline(OpPassManager &pm,
funcPm.addPass(
math::createMathExtendToSupportedTypes(extendToLLVMTypesOptions));
funcPm.addPass(rock::createRockBufferLoadMergePass());
funcPm.addPass(rock::createRockAddSchedGroupBarriersPass());
funcPm.addPass(rock::createRockTransformToMemrefPass());
funcPm.addPass(rock::createRockEmulateNarrowTypePass());
funcPm.addPass(rock::createRockPack4BitGpuOpsTo8BitPass());
Expand Down
Loading