Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions codegen/compiler/src/Quidditch/Target/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,18 @@ iree_cc_library(
HDRS
"Passes.h"
"Passes.h.inc"
"TilingScheme.h"
SRCS
"ConvertToLLVM.cpp"
"ConfigureForSnitch.cpp"
"ConfigureTiles.cpp"
"DisableQuidditchVariant.cpp"
"LinkExecutables.cpp"
"PadToTilingConfig.cpp"
"ReluToMax.cpp"
"RemoveTrivialLoops.cpp"
"TensorTile.cpp"
"TilingScheme.cpp"
DEPS
::PassesIncGen
Quidditch::Conversion::ConvertSnitchToLLVM
Expand Down
Original file line number Diff line number Diff line change
@@ -1,30 +1,43 @@
#include "Passes.h"

#include <sys/wait.h>
#include <unistd.h>
#include "Quidditch/Dialect/Snitch/IR/QuidditchSnitchAttrs.h"
#include "TilingScheme.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
#include "iree/compiler/Codegen/Utils/CPUUtils.h"
#include "iree/compiler/Codegen/Utils/Utils.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
#include "llvm/Support/raw_ostream.h"
#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

namespace quidditch {
#define GEN_PASS_DEF_CONFIGUREFORSNITCHPASS
#define GEN_PASS_DEF_CONFIGURETILES
#include "Quidditch/Target/Passes.h.inc"
} // namespace quidditch

using namespace mlir;
using namespace mlir::iree_compiler;

namespace {
class ConfigureForSnitch
: public quidditch::impl::ConfigureForSnitchPassBase<ConfigureForSnitch> {

class ConfigureTiles
: public quidditch::impl::ConfigureTilesBase<ConfigureTiles> {
public:
using Base::Base;
ConfigureTiles(const quidditch::ConfigureTilesOptions &options) {
this->importTiles = options.importTiles;
this->tbl = (quidditch::TileInfoTbl *)options.tablePointer;
}

protected:
void runOnOperation() override;

private:
std::string importTiles = "";
quidditch::TileInfoTbl *tbl;
};
} // namespace

Expand All @@ -36,62 +49,44 @@ static LogicalResult setTranslationInfo(FunctionOpInterface funcOp) {
IREE::Codegen::DispatchLoweringPassPipeline::None, SymbolRefAttr()));
}

static LogicalResult setRootConfig(FunctionOpInterface funcOp,
Operation *rootOp) {
static LogicalResult
setRootConfig(FunctionOpInterface funcOp, Operation *rootOp,
quidditch::TileInfoTbl *tbl) {
return TypeSwitch<Operation *, LogicalResult>(rootOp)
.Case<linalg::MatmulTransposeBOp>([&](linalg::LinalgOp op) {
// [0]: Always one in our matvec case.

// [1]: How many rows we are processing. Should fit in L1.
// Should be as high as possible for subgroup distribution.
// Should be a multiple of 8 to be further distributed to compute cores.

// [2]: Reduction dimension. How many columns are we
// processing at once? Cannot be distributed but has a few effects:
// * It allows us to make [1] larger by fitting more rows into L1.
// This therefore also gives us more parallelism compute core wise.
// * It makes our workgroups larger, reducing dispatch overhead and
// memory bandwidth (by only needing to copy loop invariant memory
// once + needing to copy back the result fewer times). This could
// come at the cost of concurrency for distributing workgroups but is
// only applicable once on Occamy.
// Assume tiling scheme passed in with --iree-quidditch-import-tiles
SmallVector<int64_t> workgroupTiles(3, 0);
SmallVector<int64_t> l1Tiles(3, 0);
SmallVector<int64_t> l1Interchange = {2, 0, 1};
bool dualBuffer = true;

if (funcOp.getName() ==
"main$async_dispatch_9_matmul_transpose_b_1x161x600_f64") {
l1Tiles[0] = 0;
l1Tiles[1] = 56;
l1Tiles[2] = 100;
}
if (funcOp.getName() ==
"main$async_dispatch_0_matmul_transpose_b_1x400x161_f64") {
l1Tiles[1] = 40;
// TODO: Switch to 82 and true once correctness bugs are fixed.
l1Tiles[2] = 0;
dualBuffer = false;
bool dualBuffer = false;
// if table of tiling schemes is invalid, throw an error
if (tbl == 0) {
funcOp.emitWarning() << "\nConfigureTiles: Table pointer is zero!!";
return failure();
}
if (funcOp.getName() ==
"main$async_dispatch_7_matmul_transpose_b_1x600x400_f64") {
l1Tiles[0] = 0;
l1Tiles[1] = 40;
l1Tiles[2] = 100;

// Look up the tile size, interchange, and double buffering settings
// from table
auto search = tbl->find(funcOp.getName().str());
if (search == tbl->end()) {
funcOp.emitWarning()
<< "\nConfigureTiles: Root operation of this dispatch "
"is a missing tiling scheme";
return failure();
}
if (funcOp.getName() ==
"main$async_dispatch_8_matmul_transpose_b_1x600x600_f64") {
l1Tiles[0] = 0;
l1Tiles[1] = 40;
l1Tiles[2] = 100;
quidditch::TilingScheme &ts = search->second;
if (!ts.getTiles_flat(l1Tiles)) {
funcOp.emitWarning() << "\nConfigureTiles: Found tiling scheme, but "
"couldn't get l1 tile list";
return failure();
}
if (funcOp.getName() ==
"main$async_dispatch_1_matmul_transpose_b_1x1200x400_f64") {
l1Tiles[0] = 0;
l1Tiles[1] = 40;
l1Tiles[2] = 100;
if (!ts.getOrder_flat(l1Interchange)) {
funcOp.emitWarning() << "\nConfigureTiles: Found tiling scheme, but "
"couldn't get l1 interchange";
return failure();
}

dualBuffer = ts.getDualBuffer();
// set lowering config according to info in table
setLoweringConfig(rootOp, quidditch::Snitch::LoweringConfigAttr::get(
rootOp->getContext(), workgroupTiles,
l1Tiles, l1Interchange, dualBuffer));
Expand All @@ -100,35 +95,46 @@ static LogicalResult setRootConfig(FunctionOpInterface funcOp,
.Default(success());
}

void ConfigureForSnitch::runOnOperation() {
void ConfigureTiles::runOnOperation() {
FunctionOpInterface funcOp = getOperation();
if (getTranslationInfo(funcOp))
return;

SmallVector<Operation *> computeOps = getComputeOps(funcOp);
FailureOr<Operation *> rootOp = getRootOperation(computeOps);
if (failed(rootOp))
if (failed(rootOp)) {
return signalPassFailure();
}
Operation *rootOperation = rootOp.value();
if (!rootOperation)
if (!rootOperation) {
return;
}

// Set the same translation info for all functions right now.
// This should move into 'setRootConfig' if we gain different pass pipelines
// for different kernels.
if (failed(setTranslationInfo(funcOp)))
if (failed(setTranslationInfo(funcOp))) {
return signalPassFailure();
}

// Annotate root linalg ops with tile sizes
auto loweringConfig =
getLoweringConfig<quidditch::Snitch::LoweringConfigAttr>(rootOperation);
if (!loweringConfig)
if (failed(setRootConfig(funcOp, rootOperation)))
if (!loweringConfig) {
if (failed(setRootConfig(funcOp, rootOperation, tbl))) {
funcOp.emitWarning()
<< "\nConfigureTiles: set root config failed\n";
return signalPassFailure();
}
}

// The root configuration setting introduces `tensor.dim` operations.
// Resolve those away.
RewritePatternSet patterns(funcOp.getContext());
memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns);
if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns))))
if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) {
funcOp.emitWarning() << "\nConfigureTiles: apply patterns and "
"fold greedily failed\n";
signalPassFailure();
}
}
16 changes: 16 additions & 0 deletions codegen/compiler/src/Quidditch/Target/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

include "mlir/Pass/PassBase.td"


def LinkExecutablesPass : Pass<"quidditch-link-executables", "mlir::ModuleOp"> {
let description = [{
Combines all `hal.executable.variant`s of the same target into a single
Expand All @@ -29,6 +30,21 @@ def ConfigureForSnitchPass
: InterfacePass<"quidditch-configure-for-snitch",
"mlir::FunctionOpInterface">;

def ConfigureTiles : InterfacePass<"quidditch-configure-tiles", "mlir::FunctionOpInterface"> {
let summary = "Annotate linalg operations with tile sizes";
let description = [{
Within each iree dispatch, annotate the root linalg operation with a tiling scheme (tile sizes + loop interchange).
Caveat: only tiles linalg operations of type matmul_transpose_b (for now)
Set the importTiles option to the path to the json file containing the tiling scheme for each dispatch.
}];
let options = [
Option<"importTiles", "import-tiles", "std::string", /*default=*/"",
"Name of a JSON file specifying loop bounds and order for each root linalg operation.">,
Option<"tablePointer", "NeverPassAValueHere", "std::uintptr_t", /*default=*/"0",
"Avoids opening the input file multiple times. Never pass a value to this option via the command line.">,
Comment on lines +43 to +44
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the tiling schemes are passed in by the user in a single json file, it made sense to me that before constructing the ConfigureTiles pass, that json file should be read once and its information populated in a lookup table. This way, each time the ConfigureTiles pass runs over a functionOp, it doesn't have to open and read the contents of the same file again.
I couldn't figure out an easy way to provide another argument to the ConfigureTiles pass without also exposing it to the user. Is it okay to leave it as is, or should I work harder and find a way to remove this as a command line argument?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that I see the advantage, why not read it in the pass itself?

];
}

def TensorTilePass : InterfacePass<"quidditch-tensor-tile",
"mlir::FunctionOpInterface"> {
let options = [
Expand Down
26 changes: 25 additions & 1 deletion codegen/compiler/src/Quidditch/Target/QuidditchTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@
#include "LibraryBuilder.h"
#include "Passes.h"

#include "TilingScheme.h"
#include "llvm/Support/ErrorHandling.h"

using namespace mlir;
using namespace mlir::iree_compiler;
using namespace quidditch::Snitch;
Expand Down Expand Up @@ -81,6 +84,9 @@ struct QuidditchTargetOptions {
std::string xDSLOptPath;
std::string toolChainRoot;
bool assertCompiled = false;
std::string importTiles = ""; // added for Configure Tiles Pass
quidditch::TileInfoTbl tileInfo =
quidditch::TileInfoTbl(); // added for Configure Tiles Pass
// TODO: This should actually be 112640 but DMA stack overflows. Ooopsie!
unsigned l1MemoryBytes = 100000;

Expand Down Expand Up @@ -108,6 +114,11 @@ struct QuidditchTargetOptions {
"iree-quidditch-toolchain-root", toolChainRoot, llvm::cl::cat(category),
llvm::cl::desc("Path to the root directory of the Quidditch toolchain "
"(containing the toolchain file)"));
// added for Configure Tiles Pass
binder.opt<std::string>(
"iree-quidditch-import-tiles", importTiles, llvm::cl::cat(category),
llvm::cl::desc(
"Path to a JSON file from which we import tiling schemes"));
binder.opt<bool>(
"iree-quidditch-assert-compiled", assertCompiled,
llvm::cl::cat(category),
Expand Down Expand Up @@ -173,7 +184,20 @@ class QuidditchTargetBackend final : public IREE::HAL::TargetBackend {
}
modulePassManager.addPass(createMaterializeUserConfigsPass());
FunctionLikeNest funcPassManager(modulePassManager);
funcPassManager.addPass(quidditch::createConfigureForSnitchPass);

// import any manually supplied tile sizes
if (targetOptions.importTiles != "") {
std::string errs;
quidditch::fillTileInfoTable(&targetOptions.tileInfo,
targetOptions.importTiles, errs);
}
Comment on lines +188 to +193
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import tiling schemes and populate the table before creating the ConfigureTiles pass


// automatically tile the dispatches
funcPassManager.addPass([&] {
auto thePass = quidditch::createConfigureTiles(
{targetOptions.importTiles, (std::uintptr_t)&targetOptions.tileInfo});
return thePass;
});
}

void buildTranslationPassPipeline(IREE::HAL::ExecutableTargetAttr targetAttr,
Expand Down
Loading
Loading