Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Since compiler generated stick/unstick is default on, change new option to disable it #3073

Merged
Merged
12 changes: 6 additions & 6 deletions src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ llvm::cl::opt<bool> nnpaEnableZHighDecomposeStickUnstick(

// Enabled default now, could also enable it only if parallel is on as parallel
// stick/unstick is quite a bit faster than sequential.
llvm::cl::opt<bool> nnpaEnableCompilerStickUnstick(
"enable-compiler-stick-unstick",
llvm::cl::desc("[Experimental feature] Enable the compiler generate some "
"stick/unstick code. Default is true."),
llvm::cl::init(true), llvm::cl::cat(OnnxMlirCommonOptions));
llvm::cl::opt<bool> nnpaDisableCompilerStickUnstick(
"disable-compiler-stick-unstick",
llvm::cl::desc("Disable the compiler to generate some "
"stick/unstick code. Default is false."),
llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions));

llvm::cl::opt<bool> nnpaEnableScalarBcastBinary(
"nnpa-enable-scalar-bcast-binary",
Expand Down Expand Up @@ -94,7 +94,7 @@ llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic{

llvm::cl::opt<bool> nnpaEnableSaturation("nnpa-saturation",
llvm::cl::desc("Enable saturating f32 values before stickify them."
"This option turns enable-compiler-stick-unstick on."
"This option turns off disable-compiler-stick-unstick."
"Default is false."),
llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions));

Expand Down
2 changes: 1 addition & 1 deletion src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ extern llvm::cl::OptionCategory OnnxMlirCommonOptions;
extern llvm::cl::opt<onnx_mlir::NNPAEmissionTargetType> nnpaEmissionTarget;
extern llvm::cl::opt<bool> nnpaDisableZHighToOnnx;
extern llvm::cl::opt<bool> nnpaEnableZHighDecomposeStickUnstick;
extern llvm::cl::opt<bool> nnpaEnableCompilerStickUnstick;
extern llvm::cl::opt<bool> nnpaDisableCompilerStickUnstick;
extern llvm::cl::opt<bool> nnpaEnableScalarBcastBinary;
extern llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic;
extern llvm::cl::opt<bool> profileZHighIR;
Expand Down
6 changes: 3 additions & 3 deletions src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void configurePassesNNPA() {
// z16 does not support for hardware saturation.
// So, force its usage to compiler generated sticks.
if (nnpaEnableSaturation && isLessEqualNNPALevel(NNPALevel::M14))
nnpaEnableCompilerStickUnstick = true;
nnpaDisableCompilerStickUnstick = false;

// Configure ONNXToZHighLoweringPass.
bool isDynQuant = !nnpaQuantDynamic.empty();
Expand Down Expand Up @@ -272,7 +272,7 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
pm.addPass(zlow::createZLowRewritePass());
// Late generation of code for stick/unstick, needed to be after a
// ZLowRewrite pass.
if (nnpaEnableCompilerStickUnstick)
if (!nnpaDisableCompilerStickUnstick)
pm.addPass(zlow::createZLowStickExpansionPass(enableParallel));
pm.addPass(mlir::createCanonicalizerPass());
// Normalize MemRefs.
Expand All @@ -284,7 +284,7 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
pm.addPass(zlow::createZLowRewritePass());
// The createZLowStickExpansion pass may create parallel constructs,
// they need to be handled here.
if (nnpaEnableCompilerStickUnstick && enableParallel)
if (!nnpaDisableCompilerStickUnstick && enableParallel)
pm.addPass(mlir::createConvertSCFToOpenMPPass());

pm.addPass(mlir::createCanonicalizerPass());
Expand Down
6 changes: 3 additions & 3 deletions src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ struct ZHighToZLowStickOpLowering : public ConversionPattern {
Value alloc = insertAllocForZMemRef(
zMemRefType, shapeHelper.getOutputDims(), op, rewriter);
if (isNHWCLayout(layout)) {
if (nnpaEnableCompilerStickUnstick) {
if (!nnpaDisableCompilerStickUnstick) {
// Compiler-generated stick hasn't supported NCHW yet.
// Explicitly transpose NCHW to NHWC.
input = create.onnx.toMemref(
Expand Down Expand Up @@ -818,7 +818,7 @@ struct ZHighToZLowUnstickOpLowering : public ConversionPattern {
// Allocate a buffer for the result MemRef.
Value alloc = nullptr;
if (isNHWCLayout(layout)) {
if (nnpaEnableCompilerStickUnstick) {
if (!nnpaDisableCompilerStickUnstick) {
// Compiler-generated unstick hasn't supported NCHW yet.
// This code allocates a NHWC buffer. It gets dims from the NCHW input.
SmallVector<IndexExpr> dimList;
Expand All @@ -845,7 +845,7 @@ struct ZHighToZLowUnstickOpLowering : public ConversionPattern {

// Emit a ZLow operation.
rewriter.create<ZLowUnstickOp>(loc, input, alloc, layout);
if (isNHWCLayout(layout) && nnpaEnableCompilerStickUnstick)
if (isNHWCLayout(layout) && !nnpaDisableCompilerStickUnstick)
// Compiler-generated unstick hasn't supported NCHW yet.
// Explicitly transpose NHWC to NCHW.
alloc =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=true --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s

func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> {
%0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s

func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> {
%0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s

func.func @should_lower_to_zlow_1d(%arg0: tensor<7xf32>) -> tensor<*xf16> {
%0 = "zhigh.Stick"(%arg0) {layout = "1D"} : (tensor<7xf32>) -> tensor<*xf16>
Expand Down
2 changes: 1 addition & 1 deletion test/mlir/accelerators/nnpa/driver/ccfd.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}
// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}

// COM: This test is to check regression on the IBM CCFD model.
// COM: We expect that there are only one zlow.stick for the input and one zlow.unstick for the output.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" %s | FileCheck %s

// -----

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" %s | FileCheck %s

// -----

Expand Down
4 changes: 2 additions & 2 deletions test/mlir/accelerators/nnpa/driver/saturation.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitZLowIR --nnpa-saturation=true --printIR %s | FileCheck --check-prefix=ZLOW_ON %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --nnpa-saturation=false --shape-inference --convert-onnx-to-zhigh --zhigh-decompose-stick-unstick %s | FileCheck --check-prefix=DECOMPOSE_OFF %s
// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --nnpa-saturation=true --shape-inference --convert-onnx-to-zhigh --zhigh-decompose-stick-unstick %s | FileCheck --check-prefix=DECOMPOSE_ON %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=false --enable-compiler-stick-unstick --printIR %s | FileCheck --check-prefix=COMPILER_STICK_OFF %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=true --enable-compiler-stick-unstick --printIR %s | FileCheck --check-prefix=COMPILER_STICK_ON %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=false --printIR %s | FileCheck --check-prefix=COMPILER_STICK_OFF %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=true --printIR %s | FileCheck --check-prefix=COMPILER_STICK_ON %s

// COM: for each case, check saturation ON and OFF.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR %s | FileCheck %s
// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR %s | FileCheck %s

// Check whether the compiler can remove unstick/stick so that the output of zdnn softmax is passed directly to zdnn matmul.
func.func @softmax_matmul(%arg0: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
Expand Down
Loading