Merge main and fix conflicts

microsoft · Jan 18, 2025 · c064401 · c064401
2 parents 75afaa6 + a9bf0be
commit c064401
Show file tree

Hide file tree

Showing 24 changed files with 415 additions and 849 deletions.
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
@@ -16,11 +16,6 @@ class SimpleOpBuilder : public BaseOpBuilder {
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SimpleOpBuilder);
 
  protected:
-  Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
-                       const NodeUnit& node_unit,
-                       const logging::Logger& logger,
-                       std::vector<std::string>& input_names,
-                       bool do_op_validation) const override ORT_MUST_USE_RESULT;
   Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
                                      const NodeUnit& node_unit,
                                      std::vector<std::string>&& input_names,
@@ -47,91 +42,6 @@ class SimpleOpBuilder : public BaseOpBuilder {
   static constexpr std::array<std::string_view, 3> gridsample_supported_padding_modes = {"zeros", "border", "reflection"};
 };
 
-// Move to qnn_utils if it's re-usable
-Status InsertConvertOp(QnnModelWrapper& qnn_model_wrapper,
-                       const std::string& convert_input_name,
-                       const std::string& convert_output_name,
-                       Qnn_DataType_t input_qnn_data_type,
-                       Qnn_DataType_t output_qnn_data_type,
-                       int32_t input_offset,
-                       float input_scale,
-                       const std::vector<uint32_t>& output_shape,
-                       bool do_op_validation) {
-  // Assume input is already handled.
-  float qmin = 0.0f;
-  float qmax = 255.0f;
-  ORT_RETURN_IF_ERROR(qnn::utils::GetQminQmax(input_qnn_data_type, qmin, qmax));
-  double value_min = qnn::utils::Dequantize(input_offset, input_scale, qmin);
-  double value_max = qnn::utils::Dequantize(input_offset, input_scale, qmax);
-  float scale = 0.0f;
-  int32_t offset = 0;
-  ORT_RETURN_IF_ERROR(qnn::utils::GetQuantParams(static_cast<float>(value_min),
-                                                 static_cast<float>(value_max),
-                                                 output_qnn_data_type,
-                                                 scale,
-                                                 offset));
-
-  std::vector<uint32_t> output_shape_copy = output_shape;
-  QnnTensorWrapper convert_output_tensorwrapper(convert_output_name,
-                                                QNN_TENSOR_TYPE_NATIVE,
-                                                output_qnn_data_type,
-                                                QnnQuantParamsWrapper(scale, offset),
-                                                std::move(output_shape_copy));
-  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(convert_output_tensorwrapper)), "Failed to add tensor.");
-
-  ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(convert_output_name,
-                                                    QNN_OP_PACKAGE_NAME_QTI_AISW,
-                                                    "Convert",
-                                                    {convert_input_name},
-                                                    {convert_output_name},
-                                                    {},
-                                                    do_op_validation),
-                    "Failed to add node.");
-  return Status::OK();
-}
-
-Status SimpleOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
-                                      const NodeUnit& node_unit,
-                                      const logging::Logger& logger,
-                                      std::vector<std::string>& input_names,
-                                      bool do_op_validation) const {
-  const std::string& op_type = node_unit.OpType();
-  ORT_RETURN_IF_ERROR(BaseOpBuilder::ProcessInputs(qnn_model_wrapper, node_unit, logger, input_names, do_op_validation));
-
-  if (op_type == "MatMul") {
-    const auto& inputs = node_unit.Inputs();
-    TensorInfo input0_info = {};
-    TensorInfo input1_info = {};
-    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info));
-    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input1_info));
-    // Need to insert Convert op if both inputs are dynamic inputs and are ufixed_16
-    if (!input0_info.is_initializer && !input1_info.is_initializer &&
-        input0_info.qnn_data_type == input1_info.qnn_data_type &&
-        input0_info.qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) {
-      ORT_RETURN_IF_NOT(input1_info.quant_param.IsPerTensor(),
-                        "MatMul's activation inputs only support per-tensor quantization");
-      const Qnn_QuantizeParams_t& quant_param = input1_info.quant_param.Get();
-      // insert Convert op after input1
-      std::string convert_input_name = input_names.back();
-      input_names.pop_back();
-      const std::string& matmul_output_name = node_unit.Outputs()[0].node_arg.Name();
-      std::string convert_output_name = convert_input_name + "_convert_" + matmul_output_name;
-      ORT_RETURN_IF_ERROR(InsertConvertOp(qnn_model_wrapper,
-                                          convert_input_name,
-                                          convert_output_name,
-                                          input1_info.qnn_data_type,
-                                          QNN_DATATYPE_UFIXED_POINT_8,
-                                          quant_param.scaleOffsetEncoding.offset,
-                                          quant_param.scaleOffsetEncoding.scale,
-                                          input1_info.shape,
-                                          do_op_validation));
-      input_names.push_back(convert_output_name);
-    }
-  }
-
-  return Status::OK();
-}
-
 Status SimpleOpBuilder::ExplicitOpCheck(QnnModelWrapper& qnn_model_wrapper,
                                         const NodeUnit& node_unit) const {
   const std::string& op_type = node_unit.OpType();
@@ -373,19 +283,6 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
     ORT_RETURN_IF(norm_p_order != 2, "QNN EP only supports LpNormalization with 'p' attribute equal to 2.");
   }
 
-  if (op_type == "MatMul") {
-    Qnn_Scalar_t scalar_param = QNN_SCALAR_INIT;
-    scalar_param.dataType = QNN_DATATYPE_BOOL_8;
-    scalar_param.bool8Value = 0;
-    QnnParamWrapper transpose_in0_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN0, scalar_param);
-    param_tensor_names.push_back(transpose_in0_param.GetParamTensorName());
-    qnn_model_wrapper.AddParamWrapper(std::move(transpose_in0_param));
-
-    QnnParamWrapper transpose_in1_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, scalar_param);
-    param_tensor_names.push_back(transpose_in1_param.GetParamTensorName());
-    qnn_model_wrapper.AddParamWrapper(std::move(transpose_in1_param));
-  }
-
   if (op_type == "LeakyRelu") {
     std::string input_name = "alpha";
     ORT_RETURN_IF_ERROR(ProcessAlphaAttributeAsInput(qnn_model_wrapper, node_unit, input_name));

diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
@@ -1093,35 +1093,39 @@ Status QnnBackendManager::TerminateQnnLog() {
 }
 
 void QnnBackendManager::ReleaseResources() {
+  if (!backend_setup_completed_) {
+    return;
+  }
+
   auto result = ReleaseContext();
   if (Status::OK() != result) {
-    LOGS_DEFAULT(ERROR) << "Failed to ReleaseContext: " << result.ErrorMessage();
+    LOGS_DEFAULT(ERROR) << "Failed to ReleaseContext.";
   }
 
   result = ReleaseProfilehandle();
   if (Status::OK() != result) {
-    LOGS_DEFAULT(ERROR) << "Failed to ReleaseProfilehandle: " << result.ErrorMessage();
+    LOGS_DEFAULT(ERROR) << "Failed to ReleaseProfilehandle.";
   }
 
   result = ReleaseDevice();
   if (Status::OK() != result) {
-    LOGS_DEFAULT(ERROR) << "Failed to ReleaseDevice: " << result.ErrorMessage();
+    LOGS_DEFAULT(ERROR) << "Failed to ReleaseDevice.";
   }
 
   result = ShutdownBackend();
   if (Status::OK() != result) {
-    LOGS_DEFAULT(ERROR) << "Failed to ShutdownBackend: " << result.ErrorMessage();
+    LOGS_DEFAULT(ERROR) << "Failed to ShutdownBackend.";
   }
 
   result = TerminateQnnLog();
   if (Status::OK() != result) {
-    LOGS_DEFAULT(ERROR) << "Failed to TerminateQnnLog: " << result.ErrorMessage();
+    LOGS_DEFAULT(ERROR) << "Failed to TerminateQnnLog.";
   }
 
   if (backend_lib_handle_) {
     result = UnloadLib(backend_lib_handle_);
     if (Status::OK() != result) {
-      LOGS_DEFAULT(ERROR) << "Failed to unload backend library: " << result.ErrorMessage();
+      LOGS_DEFAULT(ERROR) << "Failed to unload backend library.";
     }
   }
 

diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
@@ -74,6 +74,20 @@ Status QnnModelWrapper::MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensor
   return Status::OK();
 }
 
+Status QnnModelWrapper::MakeTensorWrapper(const TensorInfo& tensor_info,
+                                          const std::string& tensor_name,
+                                          QnnTensorWrapper& tensor_wrapper) const {
+  std::vector<uint8_t> unpacked_tensor;
+  if (tensor_info.is_initializer) {
+    ORT_RETURN_IF_ERROR(UnpackInitializerData(*tensor_info.initializer_tensor, unpacked_tensor));
+  }
+
+  tensor_wrapper = QnnTensorWrapper(tensor_name, GetTensorType(tensor_name), tensor_info.qnn_data_type,
+                                    tensor_info.quant_param.Copy(), std::vector<uint32_t>(tensor_info.shape),
+                                    std::move(unpacked_tensor));
+  return Status::OK();
+}
+
 bool QnnModelWrapper::AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper) {
   // Keep a copy of tensor name sine it will be moved with the wrapper into model_tensors_map_
   std::string tensor_name = tensor_wrapper.GetName();

diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
@@ -63,6 +63,9 @@ class QnnModelWrapper {
 
   // Make a QnnTensorWrapper from an onnx input or output.
   Status MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensorWrapper& tensor_wrapper) const;
+  Status MakeTensorWrapper(const TensorInfo& tensor_info,
+                           const std::string& tensor_name,
+                           QnnTensorWrapper& tensor_wrapper) const;
 
   // Add to internal tensor wrapper table
   bool AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper);

diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp
@@ -290,12 +290,67 @@ TEST_F(QnnHTPBackendTests, MatMulOp_QDQ) {
   RunQDQPerChannelMatMulOpTest<uint16_t, Int4x2, uint16_t>({2, 3, 3, 3}, {3, 2}, -1, QDQTolerance(),
                                                            ExpectedEPNodeAssignment::All, 18, true);
 
-  // // UINT16, per-channel INT8 weight
+  // UINT16, per-channel INT8 weight
   RunQDQPerChannelMatMulOpTest<uint16_t, int8_t, uint16_t>({2, 3}, {3, 2}, 1, QDQTolerance(),
                                                            ExpectedEPNodeAssignment::All, 21, false, false);
   RunQDQPerChannelMatMulOpTest<uint16_t, int8_t, uint16_t>({2, 3, 3}, {3}, -1, QDQTolerance(0.0041f));
 }
 
+// Tests MatMul with two uint16 (quantized) inputs that are both dynamic.
+// This exercises a workaround in QNN EP that inserts a QNN Convert op before input[1] (converts from uint16 to uint8).
+// This workaround prevents a validation error for this specific MatMul configuration.
+// Got specific shapes and input ranges (quant params) from customer model.
+TEST_F(QnnHTPBackendTests, MatMulOp_QDQ_Regression_uint16_dynamic_inputs) {
+  ProviderOptions provider_options;
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnHtp.dll";
+#else
+  provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+
+  // Test with rank 4 inputs
+  {
+    std::vector<int64_t> shape_0 = {1, 12, 512, 96};
+    TestInputDef<float> input0_def(
+        {1, 12, 512, 96}, false,
+        GetFloatDataInRange(-5.087f, 4.992f,
+                            static_cast<size_t>(std::accumulate(shape_0.begin(), shape_0.end(), static_cast<int64_t>(1),
+                                                                std::multiplies<int64_t>()))));
+    std::vector<int64_t> shape_1 = {1, 12, 96, 512};
+    TestInputDef<float> input1_def(
+        shape_1, false,
+        GetFloatDataInRange(-6.772f, 7.258f,
+                            static_cast<size_t>(std::accumulate(shape_1.begin(), shape_1.end(), static_cast<int64_t>(1),
+                                                                std::multiplies<int64_t>()))));
+
+    TestQDQModelAccuracy(
+        BuildMatMulOpTestCase(input0_def, input1_def),
+        BuildMatMulOpQDQTestCase<uint16_t, uint16_t, uint16_t>(input0_def, input1_def, false),
+        provider_options, 21, ExpectedEPNodeAssignment::All, QDQTolerance());
+  }
+
+  // Test with input[1] as rank 1
+  {
+    std::vector<int64_t> shape_0 = {1, 12, 512, 96};
+    TestInputDef<float> input0_def(
+        {1, 12, 512, 96}, false,
+        GetFloatDataInRange(-5.087f, 4.992f,
+                            static_cast<size_t>(std::accumulate(shape_0.begin(), shape_0.end(), static_cast<int64_t>(1),
+                                                                std::multiplies<int64_t>()))));
+    std::vector<int64_t> shape_1 = {96};
+    TestInputDef<float> input1_def(
+        shape_1, false,
+        GetFloatDataInRange(-6.772f, 7.258f,
+                            static_cast<size_t>(std::accumulate(shape_1.begin(), shape_1.end(), static_cast<int64_t>(1),
+                                                                std::multiplies<int64_t>()))));
+
+    TestQDQModelAccuracy(
+        BuildMatMulOpTestCase(input0_def, input1_def),
+        BuildMatMulOpQDQTestCase<uint16_t, uint16_t, uint16_t>(input0_def, input1_def, false),
+        provider_options, 21, ExpectedEPNodeAssignment::All, QDQTolerance());
+  }
+}
+
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 
 }  // namespace test

diff --git a/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh b/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh
@@ -6,7 +6,6 @@
 
 set -e
 set -x
-export PATH=/opt/python/cp312-cp312/bin:$PATH
 
 ls /build
 ls /build/deps
@@ -25,7 +24,7 @@ ANDROID_SDK_HOME="/android_home"
 ANDROID_NDK_HOME="/ndk_home"
 QNN_HOME="/qnn_home"
 
-
+python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt
 # Base command for building the AAR package
 COMMAND="python3 $BUILD_SCRIPT --build_dir /build --config $BUILD_CONFIG --android_sdk_path $ANDROID_SDK_HOME --android_ndk_path $ANDROID_NDK_HOME $BUILD_SETTINGS"
 

diff --git a/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml b/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml
@@ -4,14 +4,6 @@ parameters:
   type: boolean
   default: false
 
-resources:
-  repositories:
-  - repository: manylinux
-    type: Github
-    endpoint: Microsoft
-    name: pypa/manylinux
-    ref: 5eda9aded5462201e6310105728d33016e637ea7
-
 stages:
 - template: templates/android-binary-size-check-stage.yml
   parameters:

diff --git a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml
@@ -6,14 +6,6 @@ parameters:
   type: boolean
   default: true
 
-resources:
-  repositories:
-  - repository: manylinux
-    type: Github
-    endpoint: Microsoft
-    name: pypa/manylinux
-    ref: 5eda9aded5462201e6310105728d33016e637ea7
-
 stages:
 
 # build binaries for Android

diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@@ -70,11 +70,6 @@ resources:
     type: github
     endpoint: ort-examples
     name: microsoft/onnxruntime-inference-examples
-  - repository: manylinux
-    type: Github
-    endpoint: Microsoft
-    name: pypa/manylinux
-    ref: 5eda9aded5462201e6310105728d33016e637ea7
 
 variables:
 - template: templates/common-variables.yml

diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml
@@ -78,11 +78,7 @@ resources:
       type: github
       endpoint: ort-examples
       name: microsoft/onnxruntime-inference-examples
-    - repository: manylinux
-      type: Github
-      endpoint: Microsoft
-      name: pypa/manylinux
-      ref: 5eda9aded5462201e6310105728d33016e637ea7
+
 
 stages:
   # Set ReleaseVersionSuffix

diff --git a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
@@ -79,15 +79,15 @@ stages:
                     onnxruntimecpubuildcentos8x64 \
                     /bin/bash -c '
                       set -ex; \
-                      python3.12 /onnxruntime_src/tools/ci_build/build.py \
+                      python3 /onnxruntime_src/tools/ci_build/build.py \
                         --build_dir /build --cmake_generator 'Ninja' \
                         --config Debug \
                         --skip_submodule_sync \
                         --build_shared_lib \
                         --parallel --use_binskim_compliant_compile_flags \
                         --enable_onnx_tests --enable_address_sanitizer \
                         --update --build;
-                      python3.12 /onnxruntime_src/tools/ci_build/build.py \
+                      python3 /onnxruntime_src/tools/ci_build/build.py \
                         --build_dir /build --cmake_generator 'Ninja' \
                         --config Debug \
                         --skip_submodule_sync \