diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp index 2461e39ea1..e4332c4174 100644 --- a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp +++ b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp @@ -49,11 +49,11 @@ llvm::cl::opt nnpaEnableZHighDecomposeStickUnstick( // Enabled default now, could also enable it only if parallel is on as parallel // stick/unstick is quite a bit faster than sequential. -llvm::cl::opt nnpaEnableCompilerStickUnstick( - "enable-compiler-stick-unstick", - llvm::cl::desc("[Experimental feature] Enable the compiler generate some " - "stick/unstick code. Default is true."), - llvm::cl::init(true), llvm::cl::cat(OnnxMlirCommonOptions)); +llvm::cl::opt nnpaDisableCompilerStickUnstick( + "disable-compiler-stick-unstick", + llvm::cl::desc("Disable the compiler to generate some " + "stick/unstick code. Default is false."), + llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions)); llvm::cl::opt nnpaEnableScalarBcastBinary( "nnpa-enable-scalar-bcast-binary", @@ -94,7 +94,7 @@ llvm::cl::opt nnpaPlacementHeuristic{ llvm::cl::opt nnpaEnableSaturation("nnpa-saturation", llvm::cl::desc("Enable saturating f32 values before stickify them." - "This option turns enable-compiler-stick-unstick on." + "This option turns off disable-compiler-stick-unstick." "Default is false."), llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions)); diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp index 3958cc2bd5..545f7e5a8a 100644 --- a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp +++ b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp @@ -69,7 +69,7 @@ extern llvm::cl::OptionCategory OnnxMlirCommonOptions; extern llvm::cl::opt nnpaEmissionTarget; extern llvm::cl::opt nnpaDisableZHighToOnnx; extern llvm::cl::opt nnpaEnableZHighDecomposeStickUnstick; -extern llvm::cl::opt nnpaEnableCompilerStickUnstick; +extern llvm::cl::opt nnpaDisableCompilerStickUnstick; extern llvm::cl::opt nnpaEnableScalarBcastBinary; extern llvm::cl::opt nnpaPlacementHeuristic; extern llvm::cl::opt profileZHighIR; diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp index 1d52d60700..cb9e46a300 100644 --- a/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp +++ b/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp @@ -52,7 +52,7 @@ void configurePassesNNPA() { // z16 does not support for hardware saturation. // So, force its usage to compiler generated sticks. if (nnpaEnableSaturation && isLessEqualNNPALevel(NNPALevel::M14)) - nnpaEnableCompilerStickUnstick = true; + nnpaDisableCompilerStickUnstick = false; // Configure ONNXToZHighLoweringPass. bool isDynQuant = !nnpaQuantDynamic.empty(); @@ -272,7 +272,7 @@ void addPassesNNPA(mlir::OwningOpRef &module, pm.addPass(zlow::createZLowRewritePass()); // Late generation of code for stick/unstick, needed to be after a // ZLowRewrite pass. - if (nnpaEnableCompilerStickUnstick) + if (!nnpaDisableCompilerStickUnstick) pm.addPass(zlow::createZLowStickExpansionPass(enableParallel)); pm.addPass(mlir::createCanonicalizerPass()); // Normalize MemRefs. @@ -284,7 +284,7 @@ void addPassesNNPA(mlir::OwningOpRef &module, pm.addPass(zlow::createZLowRewritePass()); // The createZLowStickExpansion pass may create parallel constructs, // they need to be handled here. - if (nnpaEnableCompilerStickUnstick && enableParallel) + if (!nnpaDisableCompilerStickUnstick && enableParallel) pm.addPass(mlir::createConvertSCFToOpenMPPass()); pm.addPass(mlir::createCanonicalizerPass()); diff --git a/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp b/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp index 1ed8f6e85b..ad1c112c6f 100644 --- a/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp +++ b/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp @@ -561,7 +561,7 @@ struct ZHighToZLowStickOpLowering : public ConversionPattern { Value alloc = insertAllocForZMemRef( zMemRefType, shapeHelper.getOutputDims(), op, rewriter); if (isNHWCLayout(layout)) { - if (nnpaEnableCompilerStickUnstick) { + if (!nnpaDisableCompilerStickUnstick) { // Compiler-generated stick hasn't supported NCHW yet. // Explicitly transpose NCHW to NHWC. input = create.onnx.toMemref( @@ -818,7 +818,7 @@ struct ZHighToZLowUnstickOpLowering : public ConversionPattern { // Allocate a buffer for the result MemRef. Value alloc = nullptr; if (isNHWCLayout(layout)) { - if (nnpaEnableCompilerStickUnstick) { + if (!nnpaDisableCompilerStickUnstick) { // Compiler-generated unstick hasn't supported NCHW yet. // This code allocates a NHWC buffer. It gets dims from the NCHW input. SmallVector dimList; @@ -845,7 +845,7 @@ struct ZHighToZLowUnstickOpLowering : public ConversionPattern { // Emit a ZLow operation. rewriter.create(loc, input, alloc, layout); - if (isNHWCLayout(layout) && nnpaEnableCompilerStickUnstick) + if (isNHWCLayout(layout) && !nnpaDisableCompilerStickUnstick) // Compiler-generated unstick hasn't supported NCHW yet. // Explicitly transpose NHWC to NCHW. alloc = diff --git a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/compiler-stick-unstick.mlir b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/compiler-stick-unstick.mlir index 270940f2b6..59699c5741 100644 --- a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/compiler-stick-unstick.mlir +++ b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/compiler-stick-unstick.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=true --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> { %0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16> diff --git a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/stick-unstick.mlir b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/stick-unstick.mlir index 9696f5afc0..ee651df32b 100644 --- a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/stick-unstick.mlir +++ b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/stick-unstick.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> { %0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16> diff --git a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/test-datalayout.mlir b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/test-datalayout.mlir index be231f26e9..8f9e21165c 100644 --- a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/test-datalayout.mlir +++ b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/test-datalayout.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s func.func @should_lower_to_zlow_1d(%arg0: tensor<7xf32>) -> tensor<*xf16> { %0 = "zhigh.Stick"(%arg0) {layout = "1D"} : (tensor<7xf32>) -> tensor<*xf16> diff --git a/test/mlir/accelerators/nnpa/driver/ccfd.mlir b/test/mlir/accelerators/nnpa/driver/ccfd.mlir index a6aa1de5b0..c522bed319 100644 --- a/test/mlir/accelerators/nnpa/driver/ccfd.mlir +++ b/test/mlir/accelerators/nnpa/driver/ccfd.mlir @@ -1,4 +1,4 @@ -// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd} +// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd} // COM: This test is to check regression on the IBM CCFD model. // COM: We expect that there are only one zlow.stick for the input and one zlow.unstick for the output. diff --git a/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor-num2.mlir b/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor-num2.mlir index 9c24e93acb..a72bf5d096 100644 --- a/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor-num2.mlir +++ b/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor-num2.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s +// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" %s | FileCheck %s // ----- diff --git a/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor.mlir b/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor.mlir index a369f1289e..877a46aa7b 100644 --- a/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor.mlir +++ b/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s +// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" %s | FileCheck %s // ----- diff --git a/test/mlir/accelerators/nnpa/driver/saturation.mlir b/test/mlir/accelerators/nnpa/driver/saturation.mlir index 0245786f20..258ca31913 100644 --- a/test/mlir/accelerators/nnpa/driver/saturation.mlir +++ b/test/mlir/accelerators/nnpa/driver/saturation.mlir @@ -4,8 +4,8 @@ // RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitZLowIR --nnpa-saturation=true --printIR %s | FileCheck --check-prefix=ZLOW_ON %s // RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --nnpa-saturation=false --shape-inference --convert-onnx-to-zhigh --zhigh-decompose-stick-unstick %s | FileCheck --check-prefix=DECOMPOSE_OFF %s // RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --nnpa-saturation=true --shape-inference --convert-onnx-to-zhigh --zhigh-decompose-stick-unstick %s | FileCheck --check-prefix=DECOMPOSE_ON %s -// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=false --enable-compiler-stick-unstick --printIR %s | FileCheck --check-prefix=COMPILER_STICK_OFF %s -// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=true --enable-compiler-stick-unstick --printIR %s | FileCheck --check-prefix=COMPILER_STICK_ON %s +// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=false --printIR %s | FileCheck --check-prefix=COMPILER_STICK_OFF %s +// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=true --printIR %s | FileCheck --check-prefix=COMPILER_STICK_ON %s // COM: for each case, check saturation ON and OFF. diff --git a/test/mlir/accelerators/nnpa/driver/softmax-matmul-in-attention-layer.mlir b/test/mlir/accelerators/nnpa/driver/softmax-matmul-in-attention-layer.mlir index a685699191..de72769b2f 100644 --- a/test/mlir/accelerators/nnpa/driver/softmax-matmul-in-attention-layer.mlir +++ b/test/mlir/accelerators/nnpa/driver/softmax-matmul-in-attention-layer.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR %s | FileCheck %s +// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR %s | FileCheck %s // Check whether the compiler can remove unstick/stick so that the output of zdnn softmax is passed directly to zdnn matmul. func.func @softmax_matmul(%arg0: tensor) -> tensor {