Skip to content

Commit

Permalink
since enable-compiler-stick-unstick is now default on, new option to …
Browse files Browse the repository at this point in the history
…turn it off is disable-compiler-stick-unstick

Signed-off-by: Alexandre Eichenberger <[email protected]>
  • Loading branch information
AlexandreEichenberger committed Feb 12, 2025
1 parent bd52017 commit 645c209
Show file tree
Hide file tree
Showing 12 changed files with 22 additions and 22 deletions.
12 changes: 6 additions & 6 deletions src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ llvm::cl::opt<bool> nnpaEnableZHighDecomposeStickUnstick(

// Enabled default now, could also enable it only if parallel is on as parallel
// stick/unstick is quite a bit faster than sequential.
llvm::cl::opt<bool> nnpaEnableCompilerStickUnstick(
"enable-compiler-stick-unstick",
llvm::cl::desc("[Experimental feature] Enable the compiler generate some "
"stick/unstick code. Default is true."),
llvm::cl::init(true), llvm::cl::cat(OnnxMlirCommonOptions));
llvm::cl::opt<bool> nnpaDisableCompilerStickUnstick(
"disable-compiler-stick-unstick",
llvm::cl::desc("Disable the compiler to generate some "
"stick/unstick code. Default is false."),
llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions));

llvm::cl::opt<bool> nnpaEnableScalarBcastBinary(
"nnpa-enable-scalar-bcast-binary",
Expand Down Expand Up @@ -101,7 +101,7 @@ llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic{

llvm::cl::opt<bool> nnpaEnableSaturation("nnpa-saturation",
llvm::cl::desc("Enable saturating f32 values before stickify them."
"This option turns enable-compiler-stick-unstick on."
"This option turns off disable-compiler-stick-unstick."
"Default is false."),
llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions));

Expand Down
2 changes: 1 addition & 1 deletion src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ extern llvm::cl::opt<onnx_mlir::NNPAEmissionTargetType> nnpaEmissionTarget;
extern llvm::cl::opt<bool> nnpaClipToDLFloatRange;
extern llvm::cl::opt<bool> nnpaEnableZHighToOnnx;
extern llvm::cl::opt<bool> nnpaEnableZHighDecomposeStickUnstick;
extern llvm::cl::opt<bool> nnpaEnableCompilerStickUnstick;
extern llvm::cl::opt<bool> nnpaDisableCompilerStickUnstick;
extern llvm::cl::opt<bool> nnpaEnableScalarBcastBinary;
extern llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic;
extern llvm::cl::opt<bool> profileZHighIR;
Expand Down
6 changes: 3 additions & 3 deletions src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void configurePassesNNPA() {
// z16 does not support for hardware saturation.
// So, force its usage to compiler generated sticks.
if (nnpaEnableSaturation && isLessEqualNNPALevel(NNPALevel::M14))
nnpaEnableCompilerStickUnstick = true;
nnpaDisableCompilerStickUnstick = false;

// Configure ONNXToZHighLoweringPass.
bool isDynQuant = !nnpaQuantDynamic.empty();
Expand Down Expand Up @@ -282,7 +282,7 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
pm.addPass(zlow::createZLowRewritePass());
// Late generation of code for stick/unstick, needed to be after a
// ZLowRewrite pass.
if (nnpaEnableCompilerStickUnstick)
if (!nnpaDisableCompilerStickUnstick)
pm.addPass(zlow::createZLowStickExpansionPass(enableParallel));
pm.addPass(mlir::createCanonicalizerPass());
// Normalize MemRefs.
Expand All @@ -294,7 +294,7 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
pm.addPass(zlow::createZLowRewritePass());
// The createZLowStickExpansion pass may create parallel constructs,
// they need to be handled here.
if (nnpaEnableCompilerStickUnstick && enableParallel)
if (!nnpaDisableCompilerStickUnstick && enableParallel)
pm.addPass(mlir::createConvertSCFToOpenMPPass());

pm.addPass(mlir::createCanonicalizerPass());
Expand Down
6 changes: 3 additions & 3 deletions src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ struct ZHighToZLowStickOpLowering : public ConversionPattern {
Value alloc = insertAllocForZMemRef(
zMemRefType, shapeHelper.getOutputDims(), op, rewriter);
if (isNHWCLayout(layout)) {
if (nnpaEnableCompilerStickUnstick) {
if (!nnpaDisableCompilerStickUnstick) {
// Compiler-generated stick hasn't supported NCHW yet.
// Explicitly transpose NCHW to NHWC.
input = create.onnx.toMemref(
Expand Down Expand Up @@ -818,7 +818,7 @@ struct ZHighToZLowUnstickOpLowering : public ConversionPattern {
// Allocate a buffer for the result MemRef.
Value alloc = nullptr;
if (isNHWCLayout(layout)) {
if (nnpaEnableCompilerStickUnstick) {
if (!nnpaDisableCompilerStickUnstick) {
// Compiler-generated unstick hasn't supported NCHW yet.
// This code allocates a NHWC buffer. It gets dims from the NCHW input.
SmallVector<IndexExpr> dimList;
Expand All @@ -845,7 +845,7 @@ struct ZHighToZLowUnstickOpLowering : public ConversionPattern {

// Emit a ZLow operation.
rewriter.create<ZLowUnstickOp>(loc, input, alloc, layout);
if (isNHWCLayout(layout) && nnpaEnableCompilerStickUnstick)
if (isNHWCLayout(layout) && !nnpaDisableCompilerStickUnstick)
// Compiler-generated unstick hasn't supported NCHW yet.
// Explicitly transpose NHWC to NCHW.
alloc =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=true --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s

func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> {
%0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s

func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> {
%0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s

func.func @should_lower_to_zlow_1d(%arg0: tensor<7xf32>) -> tensor<*xf16> {
%0 = "zhigh.Stick"(%arg0) {layout = "1D"} : (tensor<7xf32>) -> tensor<*xf16>
Expand Down
2 changes: 1 addition & 1 deletion test/mlir/accelerators/nnpa/driver/ccfd.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}
// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}

// COM: This test is to check regression on the IBM CCFD model.
// COM: We expect that there are only one zlow.stick for the input and one zlow.unstick for the output.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" %s | FileCheck %s

// -----

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" %s | FileCheck %s

// -----

Expand Down
4 changes: 2 additions & 2 deletions test/mlir/accelerators/nnpa/driver/saturation.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitZLowIR --nnpa-saturation=true --printIR %s | FileCheck --check-prefix=ZLOW_ON %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --nnpa-saturation=false --shape-inference --convert-onnx-to-zhigh --zhigh-decompose-stick-unstick %s | FileCheck --check-prefix=DECOMPOSE_OFF %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --nnpa-saturation=true --shape-inference --convert-onnx-to-zhigh --zhigh-decompose-stick-unstick %s | FileCheck --check-prefix=DECOMPOSE_ON %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=false --enable-compiler-stick-unstick --printIR %s | FileCheck --check-prefix=COMPILER_STICK_OFF %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=true --enable-compiler-stick-unstick --printIR %s | FileCheck --check-prefix=COMPILER_STICK_ON %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=false --printIR %s | FileCheck --check-prefix=COMPILER_STICK_OFF %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=true --printIR %s | FileCheck --check-prefix=COMPILER_STICK_ON %s

// COM: for each case, check saturation ON and OFF.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR %s | FileCheck %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR %s | FileCheck %s

// Check whether the compiler can remove unstick/stick so that the output of zdnn softmax is passed directly to zdnn matmul.
func.func @softmax_matmul(%arg0: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
Expand Down

0 comments on commit 645c209

Please sign in to comment.