Skip to content

Commit

Permalink
Merge main and fix conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianlizarraga committed Jan 18, 2025
2 parents 75afaa6 + a9bf0be commit c064401
Show file tree
Hide file tree
Showing 24 changed files with 415 additions and 849 deletions.
342 changes: 281 additions & 61 deletions onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc

Large diffs are not rendered by default.

103 changes: 0 additions & 103 deletions onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ class SimpleOpBuilder : public BaseOpBuilder {
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SimpleOpBuilder);

protected:
Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
std::vector<std::string>& input_names,
bool do_op_validation) const override ORT_MUST_USE_RESULT;
Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
Expand All @@ -47,91 +42,6 @@ class SimpleOpBuilder : public BaseOpBuilder {
static constexpr std::array<std::string_view, 3> gridsample_supported_padding_modes = {"zeros", "border", "reflection"};
};

// Move to qnn_utils if it's re-usable
Status InsertConvertOp(QnnModelWrapper& qnn_model_wrapper,
const std::string& convert_input_name,
const std::string& convert_output_name,
Qnn_DataType_t input_qnn_data_type,
Qnn_DataType_t output_qnn_data_type,
int32_t input_offset,
float input_scale,
const std::vector<uint32_t>& output_shape,
bool do_op_validation) {
// Assume input is already handled.
float qmin = 0.0f;
float qmax = 255.0f;
ORT_RETURN_IF_ERROR(qnn::utils::GetQminQmax(input_qnn_data_type, qmin, qmax));
double value_min = qnn::utils::Dequantize(input_offset, input_scale, qmin);
double value_max = qnn::utils::Dequantize(input_offset, input_scale, qmax);
float scale = 0.0f;
int32_t offset = 0;
ORT_RETURN_IF_ERROR(qnn::utils::GetQuantParams(static_cast<float>(value_min),
static_cast<float>(value_max),
output_qnn_data_type,
scale,
offset));

std::vector<uint32_t> output_shape_copy = output_shape;
QnnTensorWrapper convert_output_tensorwrapper(convert_output_name,
QNN_TENSOR_TYPE_NATIVE,
output_qnn_data_type,
QnnQuantParamsWrapper(scale, offset),
std::move(output_shape_copy));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(convert_output_tensorwrapper)), "Failed to add tensor.");

ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(convert_output_name,
QNN_OP_PACKAGE_NAME_QTI_AISW,
"Convert",
{convert_input_name},
{convert_output_name},
{},
do_op_validation),
"Failed to add node.");
return Status::OK();
}

Status SimpleOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
std::vector<std::string>& input_names,
bool do_op_validation) const {
const std::string& op_type = node_unit.OpType();
ORT_RETURN_IF_ERROR(BaseOpBuilder::ProcessInputs(qnn_model_wrapper, node_unit, logger, input_names, do_op_validation));

if (op_type == "MatMul") {
const auto& inputs = node_unit.Inputs();
TensorInfo input0_info = {};
TensorInfo input1_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info));
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input1_info));
// Need to insert Convert op if both inputs are dynamic inputs and are ufixed_16
if (!input0_info.is_initializer && !input1_info.is_initializer &&
input0_info.qnn_data_type == input1_info.qnn_data_type &&
input0_info.qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) {
ORT_RETURN_IF_NOT(input1_info.quant_param.IsPerTensor(),
"MatMul's activation inputs only support per-tensor quantization");
const Qnn_QuantizeParams_t& quant_param = input1_info.quant_param.Get();
// insert Convert op after input1
std::string convert_input_name = input_names.back();
input_names.pop_back();
const std::string& matmul_output_name = node_unit.Outputs()[0].node_arg.Name();
std::string convert_output_name = convert_input_name + "_convert_" + matmul_output_name;
ORT_RETURN_IF_ERROR(InsertConvertOp(qnn_model_wrapper,
convert_input_name,
convert_output_name,
input1_info.qnn_data_type,
QNN_DATATYPE_UFIXED_POINT_8,
quant_param.scaleOffsetEncoding.offset,
quant_param.scaleOffsetEncoding.scale,
input1_info.shape,
do_op_validation));
input_names.push_back(convert_output_name);
}
}

return Status::OK();
}

Status SimpleOpBuilder::ExplicitOpCheck(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit) const {
const std::string& op_type = node_unit.OpType();
Expand Down Expand Up @@ -373,19 +283,6 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
ORT_RETURN_IF(norm_p_order != 2, "QNN EP only supports LpNormalization with 'p' attribute equal to 2.");
}

if (op_type == "MatMul") {
Qnn_Scalar_t scalar_param = QNN_SCALAR_INIT;
scalar_param.dataType = QNN_DATATYPE_BOOL_8;
scalar_param.bool8Value = 0;
QnnParamWrapper transpose_in0_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN0, scalar_param);
param_tensor_names.push_back(transpose_in0_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(transpose_in0_param));

QnnParamWrapper transpose_in1_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, scalar_param);
param_tensor_names.push_back(transpose_in1_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(transpose_in1_param));
}

if (op_type == "LeakyRelu") {
std::string input_name = "alpha";
ORT_RETURN_IF_ERROR(ProcessAlphaAttributeAsInput(qnn_model_wrapper, node_unit, input_name));
Expand Down
16 changes: 10 additions & 6 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1093,35 +1093,39 @@ Status QnnBackendManager::TerminateQnnLog() {
}

void QnnBackendManager::ReleaseResources() {
if (!backend_setup_completed_) {
return;
}

auto result = ReleaseContext();
if (Status::OK() != result) {
LOGS_DEFAULT(ERROR) << "Failed to ReleaseContext: " << result.ErrorMessage();
LOGS_DEFAULT(ERROR) << "Failed to ReleaseContext.";
}

result = ReleaseProfilehandle();
if (Status::OK() != result) {
LOGS_DEFAULT(ERROR) << "Failed to ReleaseProfilehandle: " << result.ErrorMessage();
LOGS_DEFAULT(ERROR) << "Failed to ReleaseProfilehandle.";
}

result = ReleaseDevice();
if (Status::OK() != result) {
LOGS_DEFAULT(ERROR) << "Failed to ReleaseDevice: " << result.ErrorMessage();
LOGS_DEFAULT(ERROR) << "Failed to ReleaseDevice.";
}

result = ShutdownBackend();
if (Status::OK() != result) {
LOGS_DEFAULT(ERROR) << "Failed to ShutdownBackend: " << result.ErrorMessage();
LOGS_DEFAULT(ERROR) << "Failed to ShutdownBackend.";
}

result = TerminateQnnLog();
if (Status::OK() != result) {
LOGS_DEFAULT(ERROR) << "Failed to TerminateQnnLog: " << result.ErrorMessage();
LOGS_DEFAULT(ERROR) << "Failed to TerminateQnnLog.";
}

if (backend_lib_handle_) {
result = UnloadLib(backend_lib_handle_);
if (Status::OK() != result) {
LOGS_DEFAULT(ERROR) << "Failed to unload backend library: " << result.ErrorMessage();
LOGS_DEFAULT(ERROR) << "Failed to unload backend library.";
}
}

Expand Down
14 changes: 14 additions & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,20 @@ Status QnnModelWrapper::MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensor
return Status::OK();
}

Status QnnModelWrapper::MakeTensorWrapper(const TensorInfo& tensor_info,
const std::string& tensor_name,
QnnTensorWrapper& tensor_wrapper) const {
std::vector<uint8_t> unpacked_tensor;
if (tensor_info.is_initializer) {
ORT_RETURN_IF_ERROR(UnpackInitializerData(*tensor_info.initializer_tensor, unpacked_tensor));
}

tensor_wrapper = QnnTensorWrapper(tensor_name, GetTensorType(tensor_name), tensor_info.qnn_data_type,
tensor_info.quant_param.Copy(), std::vector<uint32_t>(tensor_info.shape),
std::move(unpacked_tensor));
return Status::OK();
}

bool QnnModelWrapper::AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper) {
// Keep a copy of tensor name sine it will be moved with the wrapper into model_tensors_map_
std::string tensor_name = tensor_wrapper.GetName();
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ class QnnModelWrapper {

// Make a QnnTensorWrapper from an onnx input or output.
Status MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensorWrapper& tensor_wrapper) const;
Status MakeTensorWrapper(const TensorInfo& tensor_info,
const std::string& tensor_name,
QnnTensorWrapper& tensor_wrapper) const;

// Add to internal tensor wrapper table
bool AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper);
Expand Down
57 changes: 56 additions & 1 deletion onnxruntime/test/providers/qnn/matmul_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,67 @@ TEST_F(QnnHTPBackendTests, MatMulOp_QDQ) {
RunQDQPerChannelMatMulOpTest<uint16_t, Int4x2, uint16_t>({2, 3, 3, 3}, {3, 2}, -1, QDQTolerance(),
ExpectedEPNodeAssignment::All, 18, true);

// // UINT16, per-channel INT8 weight
// UINT16, per-channel INT8 weight
RunQDQPerChannelMatMulOpTest<uint16_t, int8_t, uint16_t>({2, 3}, {3, 2}, 1, QDQTolerance(),
ExpectedEPNodeAssignment::All, 21, false, false);
RunQDQPerChannelMatMulOpTest<uint16_t, int8_t, uint16_t>({2, 3, 3}, {3}, -1, QDQTolerance(0.0041f));
}

// Tests MatMul with two uint16 (quantized) inputs that are both dynamic.
// This exercises a workaround in QNN EP that inserts a QNN Convert op before input[1] (converts from uint16 to uint8).
// This workaround prevents a validation error for this specific MatMul configuration.
// Got specific shapes and input ranges (quant params) from customer model.
TEST_F(QnnHTPBackendTests, MatMulOp_QDQ_Regression_uint16_dynamic_inputs) {
ProviderOptions provider_options;
#if defined(_WIN32)
provider_options["backend_path"] = "QnnHtp.dll";
#else
provider_options["backend_path"] = "libQnnHtp.so";
#endif

// Test with rank 4 inputs
{
std::vector<int64_t> shape_0 = {1, 12, 512, 96};
TestInputDef<float> input0_def(
{1, 12, 512, 96}, false,
GetFloatDataInRange(-5.087f, 4.992f,
static_cast<size_t>(std::accumulate(shape_0.begin(), shape_0.end(), static_cast<int64_t>(1),
std::multiplies<int64_t>()))));
std::vector<int64_t> shape_1 = {1, 12, 96, 512};
TestInputDef<float> input1_def(
shape_1, false,
GetFloatDataInRange(-6.772f, 7.258f,
static_cast<size_t>(std::accumulate(shape_1.begin(), shape_1.end(), static_cast<int64_t>(1),
std::multiplies<int64_t>()))));

TestQDQModelAccuracy(
BuildMatMulOpTestCase(input0_def, input1_def),
BuildMatMulOpQDQTestCase<uint16_t, uint16_t, uint16_t>(input0_def, input1_def, false),
provider_options, 21, ExpectedEPNodeAssignment::All, QDQTolerance());
}

// Test with input[1] as rank 1
{
std::vector<int64_t> shape_0 = {1, 12, 512, 96};
TestInputDef<float> input0_def(
{1, 12, 512, 96}, false,
GetFloatDataInRange(-5.087f, 4.992f,
static_cast<size_t>(std::accumulate(shape_0.begin(), shape_0.end(), static_cast<int64_t>(1),
std::multiplies<int64_t>()))));
std::vector<int64_t> shape_1 = {96};
TestInputDef<float> input1_def(
shape_1, false,
GetFloatDataInRange(-6.772f, 7.258f,
static_cast<size_t>(std::accumulate(shape_1.begin(), shape_1.end(), static_cast<int64_t>(1),
std::multiplies<int64_t>()))));

TestQDQModelAccuracy(
BuildMatMulOpTestCase(input0_def, input1_def),
BuildMatMulOpQDQTestCase<uint16_t, uint16_t, uint16_t>(input0_def, input1_def, false),
provider_options, 21, ExpectedEPNodeAssignment::All, QDQTolerance());
}
}

#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)

} // namespace test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

set -e
set -x
export PATH=/opt/python/cp312-cp312/bin:$PATH

ls /build
ls /build/deps
Expand All @@ -25,7 +24,7 @@ ANDROID_SDK_HOME="/android_home"
ANDROID_NDK_HOME="/ndk_home"
QNN_HOME="/qnn_home"


python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt
# Base command for building the AAR package
COMMAND="python3 $BUILD_SCRIPT --build_dir /build --config $BUILD_CONFIG --android_sdk_path $ANDROID_SDK_HOME --android_ndk_path $ANDROID_NDK_HOME $BUILD_SETTINGS"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,6 @@ parameters:
type: boolean
default: false

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

stages:
- template: templates/android-binary-size-check-stage.yml
parameters:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ parameters:
type: boolean
default: true

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

stages:

# build binaries for Android
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,6 @@ resources:
type: github
endpoint: ort-examples
name: microsoft/onnxruntime-inference-examples
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

variables:
- template: templates/common-variables.yml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,7 @@ resources:
type: github
endpoint: ort-examples
name: microsoft/onnxruntime-inference-examples
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7


stages:
# Set ReleaseVersionSuffix
Expand Down
4 changes: 2 additions & 2 deletions tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ stages:
onnxruntimecpubuildcentos8x64 \
/bin/bash -c '
set -ex; \
python3.12 /onnxruntime_src/tools/ci_build/build.py \
python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator 'Ninja' \
--config Debug \
--skip_submodule_sync \
--build_shared_lib \
--parallel --use_binskim_compliant_compile_flags \
--enable_onnx_tests --enable_address_sanitizer \
--update --build;
python3.12 /onnxruntime_src/tools/ci_build/build.py \
python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator 'Ninja' \
--config Debug \
--skip_submodule_sync \
Expand Down
Loading

0 comments on commit c064401

Please sign in to comment.