From db8e10b0b9055fa5eacdf8ea07818226ac9fe6a4 Mon Sep 17 00:00:00 2001 From: Edward Chen <18449977+edgchen1@users.noreply.github.com> Date: Fri, 17 Jan 2025 14:35:25 -0800 Subject: [PATCH 1/3] Revert "[QNN EP] Clean up correctly from a partial setup (#23320)" (#23420) ### Description This reverts commit 5d215ff810ceb2d6c0726befd61ba7b7f21516c9. ### Motivation and Context The reverted change causes a packaging pipeline to fail due to a crash in one of the E2E Android tests. Reverting this first to fix the pipeline. We should come up with an alternative way to properly do the necessary clean up. --- .../providers/qnn/builder/qnn_backend_manager.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 69eabe457a14a..e91fda32510dd 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -1097,35 +1097,39 @@ Status QnnBackendManager::TerminateQnnLog() { } void QnnBackendManager::ReleaseResources() { + if (!backend_setup_completed_) { + return; + } + auto result = ReleaseContext(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to ReleaseContext: " << result.ErrorMessage(); + LOGS_DEFAULT(ERROR) << "Failed to ReleaseContext."; } result = ReleaseProfilehandle(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to ReleaseProfilehandle: " << result.ErrorMessage(); + LOGS_DEFAULT(ERROR) << "Failed to ReleaseProfilehandle."; } result = ReleaseDevice(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to ReleaseDevice: " << result.ErrorMessage(); + LOGS_DEFAULT(ERROR) << "Failed to ReleaseDevice."; } result = ShutdownBackend(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to ShutdownBackend: " << result.ErrorMessage(); + LOGS_DEFAULT(ERROR) << "Failed to ShutdownBackend."; } result = TerminateQnnLog(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to TerminateQnnLog: " << result.ErrorMessage(); + LOGS_DEFAULT(ERROR) << "Failed to TerminateQnnLog."; } if (backend_lib_handle_) { result = UnloadLib(backend_lib_handle_); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to unload backend library: " << result.ErrorMessage(); + LOGS_DEFAULT(ERROR) << "Failed to unload backend library."; } } From d461ca9dcd27d5fb8d07c4227d85a455b060de75 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Fri, 17 Jan 2025 15:29:17 -0800 Subject: [PATCH 2/3] Update onnxruntime binary size checks ci pipeline's docker image (#23405) 1. Update onnxruntime binary size checks ci pipeline's docker image. Use a different docker image that is not manylinux based. The new one is smaller. 2. Add flatbuffers tools/ci_build/requirements/pybind/requirements.txt 3. Delete tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml. The pipeline was for generating packages for Olive, but it went unused. And the content is highly duplicated with our official python packaging pipeline. 4. A lot of YAML files reference pypa/manylinux git repo but do not use it. This PR removes the references. --- .../android/build_aar_and_copy_artifacts.sh | 3 +- .../binary-size-checks-pipeline.yml | 8 - .../build-perf-test-binaries-pipeline.yml | 8 - .../c-api-noopenmp-packaging-pipelines.yml | 5 - .../cuda-packaging-pipeline.yml | 6 +- .../azure-pipelines/linux-ci-pipeline.yml | 4 +- .../linux-cpu-minimal-build-ci-pipeline.yml | 50 +- .../mac-react-native-ci-pipeline.yml | 8 - .../py-package-build-pipeline.yml | 62 -- .../android-binary-size-check-stage.yml | 23 +- .../templates/android-java-api-aar.yml | 9 +- .../templates/c-api-linux-cpu.yml | 2 +- .../py-packaging-selectable-stage.yml | 529 ------------------ .../github/linux/build_cuda_c_api_package.sh | 2 +- .../linux/build_tensorrt_c_api_package.sh | 2 +- .../build_full_ort_and_create_ort_files.sh | 3 +- .../build_minimal_ort_and_run_tests.sh | 4 +- .../requirements/pybind/requirements.txt | 1 + 18 files changed, 51 insertions(+), 678 deletions(-) delete mode 100644 tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml delete mode 100644 tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml diff --git a/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh b/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh index 29c52404dc7e3..001fa2dc188a4 100755 --- a/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh +++ b/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh @@ -6,7 +6,6 @@ set -e set -x -export PATH=/opt/python/cp312-cp312/bin:$PATH ls /build ls /build/deps @@ -25,7 +24,7 @@ ANDROID_SDK_HOME="/android_home" ANDROID_NDK_HOME="/ndk_home" QNN_HOME="/qnn_home" - +python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt # Base command for building the AAR package COMMAND="python3 $BUILD_SCRIPT --build_dir /build --config $BUILD_CONFIG --android_sdk_path $ANDROID_SDK_HOME --android_ndk_path $ANDROID_NDK_HOME $BUILD_SETTINGS" diff --git a/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml b/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml index 74866cfd59b52..ca7ef2e49cdf6 100644 --- a/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml @@ -4,14 +4,6 @@ parameters: type: boolean default: false -resources: - repositories: - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 - stages: - template: templates/android-binary-size-check-stage.yml parameters: diff --git a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml index 5adaa686f6c0f..0ce4227c9ef9f 100644 --- a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml @@ -6,14 +6,6 @@ parameters: type: boolean default: true -resources: - repositories: - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 - stages: # build binaries for Android diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index cbc3aa705b4f9..b5209ad3ab9d8 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -70,11 +70,6 @@ resources: type: github endpoint: ort-examples name: microsoft/onnxruntime-inference-examples - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 variables: - template: templates/common-variables.yml diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml index bc33aba57ec93..b24310ac0c3e0 100644 --- a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml @@ -78,11 +78,7 @@ resources: type: github endpoint: ort-examples name: microsoft/onnxruntime-inference-examples - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 + stages: # Set ReleaseVersionSuffix diff --git a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml index 0c10d404931e4..dc57fd41fa5f9 100644 --- a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml @@ -79,7 +79,7 @@ stages: onnxruntimecpubuildcentos8x64 \ /bin/bash -c ' set -ex; \ - python3.12 /onnxruntime_src/tools/ci_build/build.py \ + python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build --cmake_generator 'Ninja' \ --config Debug \ --skip_submodule_sync \ @@ -87,7 +87,7 @@ stages: --parallel --use_binskim_compliant_compile_flags \ --enable_onnx_tests --enable_address_sanitizer \ --update --build; - python3.12 /onnxruntime_src/tools/ci_build/build.py \ + python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build --cmake_generator 'Ninja' \ --config Debug \ --skip_submodule_sync \ diff --git a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml index e74f1968e3be3..c323e51035c23 100644 --- a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml @@ -67,10 +67,10 @@ jobs: - template: templates/get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu - Context: tools/ci_build/github/linux/docker + Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile + Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecpubuild + Repository: onnxruntimecpubuildcentos8x64_packaging - task: CmdLine@2 displayName: Create test data directory @@ -104,7 +104,7 @@ jobs: -e BUILD_BUILDNUMBER \ -e CCACHE_DIR=/cache \ -e ORT_BUILD_WITH_CACHE=1 \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash -c " set -e -x; /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh /build/1; \ @@ -124,8 +124,8 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/2 --cmake_generator Ninja \ --config Debug \ --skip_submodule_sync \ @@ -134,7 +134,7 @@ jobs: --skip_tests \ --minimal_build \ --disable_exceptions \ - --enable_training_ops + --enable_training_ops" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -148,7 +148,7 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh \ --build-directory /build/3a \ --reduced-ops-config /home/onnxruntimedev/.test_data/required_ops.ort_models.config \ @@ -166,7 +166,7 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh \ --build-directory /build/3b \ --reduced-ops-config /home/onnxruntimedev/.test_data/required_ops_and_types.ort_models.config \ @@ -188,7 +188,7 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh \ --build-directory /build/4 \ --reduced-ops-config /home/onnxruntimedev/.test_data/globally_allowed_types.config \ @@ -206,14 +206,14 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/5 --cmake_generator Ninja \ --config Debug \ --skip_submodule_sync \ --build_shared_lib --use_binskim_compliant_compile_flags \ --parallel \ - --minimal_build extended + --minimal_build extended" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -227,8 +227,8 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/6a \ --cmake_generator Ninja \ --config MinSizeRel \ @@ -240,7 +240,7 @@ jobs: --disable_ml_ops \ --disable_types sparsetensor float8 optional \ --include_ops_by_config /home/onnxruntimedev/.test_data/include_no_operators.config \ - --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -254,8 +254,8 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/6b \ --cmake_generator Ninja \ --config MinSizeRel \ @@ -269,7 +269,7 @@ jobs: --enable_reduced_operator_type_support \ --disable_types sparsetensor optional float8 \ --include_ops_by_config /home/onnxruntimedev/.test_data/include_no_operators.config \ - --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -283,8 +283,8 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/6c \ --cmake_generator Ninja \ --config MinSizeRel \ @@ -298,7 +298,7 @@ jobs: --enable_reduced_operator_type_support \ --disable_types sparsetensor optional float8 \ --include_ops_by_config /home/onnxruntimedev/.test_data/include_no_operators.config \ - --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -313,8 +313,8 @@ jobs: --volume $NDK_HOME:/ndk_home \ -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/7 \ --cmake_generator Ninja \ --config MinSizeRel \ @@ -330,7 +330,7 @@ jobs: --build_shared_lib \ --disable_ml_ops \ --disable_exceptions \ - --skip_tests --path_to_protoc_exe /usr/bin/protoc + --skip_tests" workingDirectory: $(Build.SourcesDirectory) - template: templates/explicitly-defined-final-tasks.yml diff --git a/tools/ci_build/github/azure-pipelines/mac-react-native-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/mac-react-native-ci-pipeline.yml index ba9610ffee793..e72f088cfeb55 100644 --- a/tools/ci_build/github/azure-pipelines/mac-react-native-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/mac-react-native-ci-pipeline.yml @@ -39,14 +39,6 @@ parameters: - 'custom' default: 'nightly (@dev)' -resources: - repositories: - - repository: manylinux # The name used to reference this repository in the checkout step - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 - variables: skipComponentGovernanceDetection: true ${{ if eq(parameters.NpmPublish, 'nightly (@dev)') }}: diff --git a/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml deleted file mode 100644 index afa0ad6f4cbc7..0000000000000 --- a/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml +++ /dev/null @@ -1,62 +0,0 @@ -parameters: -- name: enable_linux_cpu - displayName: 'Whether Linux CPU package is built.' - type: boolean - default: true - -- name: enable_linux_gpu - displayName: 'Whether Linux GPU package is built.' - type: boolean - default: true - -- name: enable_windows_cpu - displayName: 'Whether Windows CPU package is built.' - type: boolean - default: true - -- name: enable_windows_gpu - displayName: 'Whether Windows GPU package is built.' - type: boolean - default: true - -- name: python_version - displayName: 'Python version used for build' - type: object - default: ["'3.8'"] - -- name: cpu_build_py_parameters - displayName: 'Extra parameters to pass to build.py for CPU package.' - type: string - default: '--use_openvino CPU' - -- name: gpu_build_py_parameters - displayName: 'Extra parameters to pass to build.py for GPU package.' - type: string - default: ' ' - -- name: ubuntu_version_linux_cpu - displayName: 'Ubuntu Version for Linux CPU package.' - type: string - default: '20.04' - -trigger: none - -resources: - repositories: - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 - -stages: -- template: templates/py-packaging-selectable-stage.yml - parameters: - enable_linux_gpu: ${{ parameters.enable_linux_gpu }} - enable_linux_cpu: ${{ parameters.enable_linux_cpu }} - enable_windows_cpu: ${{ parameters.enable_windows_cpu }} - enable_windows_gpu: ${{ parameters.enable_windows_gpu }} - python_version: ${{ parameters.python_version }} - cpu_build_py_parameters: ${{ parameters.cpu_build_py_parameters }} - gpu_build_py_parameters: ${{ parameters.gpu_build_py_parameters }} - ubuntu_version_linux_cpu: ${{ parameters.ubuntu_version_linux_cpu }} diff --git a/tools/ci_build/github/azure-pipelines/templates/android-binary-size-check-stage.yml b/tools/ci_build/github/azure-pipelines/templates/android-binary-size-check-stage.yml index 4d9606d82ced2..3cccd3aee15cd 100644 --- a/tools/ci_build/github/azure-pipelines/templates/android-binary-size-check-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/android-binary-size-check-stage.yml @@ -38,13 +38,12 @@ stages: submodules: none - template: use-android-ndk.yml - #TODO: use a different docker file since this job doesn't need to rely on manylinux - template: get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu - Context: tools/ci_build/github/linux/docker + Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile + Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecpubuild + Repository: onnxruntimecpubuildcentos8x64_packaging - task: PythonScript@0 displayName: 'Set variables from config file "${{ parameters.BuildConfigFile }}"' @@ -83,6 +82,7 @@ stages: --volume $(Build.BinariesDirectory):/build \ --volume $ANDROID_HOME:/android_home \ --volume $NDK_HOME:/ndk_home \ + -w /onnxruntime_src \ -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ @@ -90,11 +90,10 @@ stages: -e BUILD_ID=$(Build.BuildId) \ -e BUILD_REASON=$(Build.Reason) \ -e BUILD_BRANCH=$(Build.SourceBranch) \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3 /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_ort_and_check_binary_size.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r tools/ci_build/requirements/pybind/requirements.txt && python3 tools/ci_build/github/linux/ort_minimal/build_ort_and_check_binary_size.py \ --build_dir /build/1a \ - ${BINARY_SIZE_THRESHOLD_ARGS} \ - "/onnxruntime_src/${{ parameters.BuildConfigFile }}" + ${BINARY_SIZE_THRESHOLD_ARGS} ${{ parameters.BuildConfigFile }} " workingDirectory: $(Build.SourcesDirectory) - task: AzureCLI@2 @@ -139,6 +138,7 @@ stages: --volume $(Build.BinariesDirectory):/build \ --volume $ANDROID_HOME:/android_home \ --volume $NDK_HOME:/ndk_home \ + -w /onnxruntime_src \ -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ @@ -146,11 +146,10 @@ stages: -e BUILD_ID=$(Build.BuildId) \ -e BUILD_REASON=$(Build.Reason) \ -e BUILD_BRANCH=$(Build.SourceBranch) \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3 /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_ort_and_check_binary_size.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r tools/ci_build/requirements/pybind/requirements.txt && python3 tools/ci_build/github/linux/ort_minimal/build_ort_and_check_binary_size.py \ --build_dir /build/1b \ - --with_debug_info \ - "/onnxruntime_src/${{ parameters.BuildConfigFile }}" + --with_debug_info ${{ parameters.BuildConfigFile }}" workingDirectory: $(Build.SourcesDirectory) - task: PublishPipelineArtifact@1 diff --git a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml index c38736edd58f1..707e8eac0249f 100644 --- a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml +++ b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml @@ -76,13 +76,12 @@ jobs: mkdir -p $(artifacts_directory) workingDirectory: $(Build.BinariesDirectory) - #TODO: use a different docker file since this job doesn't need to rely on manylinux - template: get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu - Context: tools/ci_build/github/linux/docker + Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile + Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecpubuild + Repository: onnxruntimecpubuildcentos8x64_packaging - template: set-version-number-variables-step.yml @@ -127,7 +126,7 @@ jobs: -e PUBLISH_EXECUTABLES=${{parameters.publish_executables}} \ -e PACKAGE_NAME=${{parameters.packageName}} \ -e RELEASE_VERSION_SUFFIX=${{parameters.ReleaseVersionSuffix}} \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash /onnxruntime_src/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh $USE_QNN workingDirectory: $(Build.SourcesDirectory) diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml index 30b9e93594b55..d4bc54273a764 100644 --- a/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml +++ b/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml @@ -65,7 +65,7 @@ jobs: set -e -x mkdir -p $HOME/.onnx docker run --rm --volume /data/onnx:/data/onnx:ro --volume $(Build.SourcesDirectory):/onnxruntime_src --volume $(Build.BinariesDirectory):/build \ - --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging /bin/bash -c "python3.12 \ + --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging /bin/bash -c "python3 \ /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release \ --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib ${{ parameters.AdditionalBuildFlags }} && cd /build/Release && make install DESTDIR=/build/installed" workingDirectory: $(Build.SourcesDirectory) diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml deleted file mode 100644 index b4908fd1ae45a..0000000000000 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml +++ /dev/null @@ -1,529 +0,0 @@ -parameters: -- name: enable_linux_cpu - displayName: 'Whether Linux CPU package is built.' - type: boolean - default: true - -- name: enable_windows_cpu - displayName: 'Whether Windows CPU package is built.' - type: boolean - default: true - -- name: enable_linux_gpu - displayName: 'Whether Linux GPU package is built.' - type: boolean - default: true - -- name: enable_windows_gpu - displayName: 'Whether Windows GPU package is built.' - type: boolean - default: true - -- name: python_version - displayName: 'Python version used for build' - type: object - default: ["'3.8'"] - -- name: cpu_build_py_parameters - displayName: 'Extra parameters to pass to build.py for CPU package.' - type: string - default: '--use_openvino CPU' - -- name: gpu_build_py_parameters - displayName: 'Extra parameters to pass to build.py for GPU package.' - type: string - default: ' ' - -- name: ubuntu_version_linux_cpu - displayName: 'Ubuntu Version for Linux CPU package.' - type: string - default: '20.04' - -stages: -- stage: Python_Packaging - - jobs: - - ${{ if eq(parameters.enable_linux_cpu, true) }}: - - job: Linux_CPU_py_Wheels - timeoutInMinutes: 90 - workspace: - clean: all - pool: onnxruntime-Ubuntu2204-AMD-CPU - strategy: - matrix: - ${{ each PythonVersion in parameters.python_version }}: - 'Python${{ PythonVersion }}': - PythonVersion: ${{ PythonVersion }} - variables: - UbuntuVersion: ${{ parameters.ubuntu_version_linux_cpu }} - steps: - - checkout: self - clean: true - submodules: recursive - - - template: get-docker-image-steps.yml - parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino - Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg PYTHON_VERSION=$(PythonVersion) --build-arg UBUNTU_VERSION=$(UbuntuVersion)" - Repository: onnxruntimeubuntupython$(PythonVersion)cpubuild - - - task: CmdLine@2 - displayName: 'Build Python Wheel' - inputs: - script: | - mkdir -p $HOME/.onnx - docker run --rm \ - --volume /data/onnx:/data/onnx:ro \ - --volume $(Build.SourcesDirectory):/onnxruntime_src \ - --volume $(Build.BinariesDirectory):/build \ - --volume /data/models:/build/models:ro \ - --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \ - -e BUILD_BUILDNUMBER \ - onnxruntimeubuntupython$(PythonVersion)cpubuild \ - python3 /onnxruntime_src/tools/ci_build/build.py \ - --build_dir /build \ - --config Release --update --build \ - --skip_submodule_sync \ - --parallel \ - --enable_lto \ - --build_wheel \ - --enable_onnx_tests \ - --test \ - --ctest_path '' \ - ${{ parameters.cpu_build_py_parameters }} - workingDirectory: $(Build.SourcesDirectory) - - - task: CopyFiles@2 - displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)/Release/dist' - Contents: '*.whl' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: ONNXRuntime python wheel and documentation' - inputs: - ArtifactName: onnxruntime - - - template: component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - - - template: clean-agent-build-directory-step.yml - - - ${{ if eq(parameters.enable_windows_cpu, true) }}: - - job: Windows_CPU_py_Wheels - pool: 'onnxruntime-cpu-openvino-winbuild' - strategy: - matrix: - ${{ each PythonVersion in parameters.python_version }}: - 'Python${{ PythonVersion }}': - PythonVersion: ${{ PythonVersion }} - variables: - OnnxRuntimeBuildDirectory: '$(Build.BinariesDirectory)' - EnvSetupScript: setup_env.bat - setVcvars: true - BuildConfig: 'RelWithDebInfo' - timeoutInMinutes: 120 - workspace: - clean: all - - steps: - - checkout: self - clean: true - submodules: recursive - - - task: UsePythonVersion@0 - inputs: - versionSpec: $(PythonVersion) - addToPath: true - - - task: BatchScript@1 - displayName: 'setup env' - inputs: - filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\$(EnvSetupScript)' - modifyEnvironment: true - workingFolder: '$(Build.BinariesDirectory)' - - - task: BatchScript@1 - displayName: 'setup OpenVino env' - inputs: - filename: 'C:\Program Files\Intel\openvino_2021.4.752\bin\setupvars.bat' - modifyEnvironment: true - - - task: PowerShell@2 - displayName: 'Install ONNX' - inputs: - filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/windows/install_third_party_deps.ps1' - workingDirectory: '$(Build.BinariesDirectory)' - arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\$(BuildConfig)\installed -build_config $(BuildConfig) - - - task: PythonScript@0 - displayName: 'Generate cmake config' - inputs: - scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: > - --config $(BuildConfig) - --enable_lto - --build_dir $(Build.BinariesDirectory) - --skip_submodule_sync - --cmake_generator "Visual Studio 17 2022" - --enable_pybind - --enable_onnx_tests - ${{ parameters.cpu_build_py_parameters }} - --parallel --update - workingDirectory: '$(Build.BinariesDirectory)' - - - task: VSBuild@1 - displayName: 'Build' - inputs: - solution: '$(Build.BinariesDirectory)\$(BuildConfig)\onnxruntime.sln' - platform: x64 - configuration: $(BuildConfig) - msbuildArchitecture: x64 - maximumCpuCount: true - logProjectEvents: true - workingFolder: '$(Build.BinariesDirectory)\$(BuildConfig)' - createLogFile: true - - # Esrp signing - - template: win-esrp-dll.yml - parameters: - FolderPath: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)\onnxruntime\capi' - DisplayName: 'ESRP - Sign Native dlls' - DoEsrp: true - Pattern: '*.pyd,*.dll' - - - task: PythonScript@0 - displayName: 'Build wheel' - inputs: - scriptPath: '$(Build.SourcesDirectory)\setup.py' - arguments: 'bdist_wheel' - workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)' - - - task: CopyFiles@2 - displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)\dist' - Contents: '*.whl' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: ONNXRuntime python wheel' - inputs: - ArtifactName: onnxruntime - - - script: | - 7z x *.whl - workingDirectory: '$(Build.ArtifactStagingDirectory)' - displayName: 'unzip the package' - - task: CredScan@3 - displayName: 'Run CredScan' - inputs: - debugMode: false - continueOnError: true - - - task: BinSkim@4 - displayName: 'Run BinSkim' - inputs: - AnalyzeTargetGlob: '+:file|$(Build.ArtifactStagingDirectory)\**\*.dll;-:file|$(Build.ArtifactStagingDirectory)\**\DirectML.dll' - continueOnError: true - - - task: DeleteFiles@1 - displayName: 'Delete files from $(Build.BinariesDirectory)\$(BuildConfig)' - condition: and (succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - SourceFolder: '$(Build.BinariesDirectory)\$(BuildConfig)' - Contents: | - **/*.obj - **/*.pdb - **/*.dll - - - powershell: | - python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq - Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*.whl | foreach {pip --disable-pip-version-check install --force-reinstall --upgrade $_.fullname tabulate} - python -m pip install protobuf==3.18.1 - Remove-Item -Recurse -Force onnxruntime - python onnx_backend_test_series.py - workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)' - displayName: 'Run Python Tests' - - #Skip it for 32 bits x86 build. Currently the scan tool has a bug: it doesn't allow me use 64 bits link.exe - #in 32 bits Win32 build. I tried all the settings but they all don't work. - - task: SDLNativeRules@3 - displayName: 'Run the PREfast SDL Native Rules for MSBuild' - condition: and (succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - msBuildArchitecture: amd64 - setupCommandlines: 'python $(Build.SourcesDirectory)\tools\ci_build\build.py --config Debug --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "Visual Studio 17 2022" --enable_pybind --enable_onnx_tests --parallel --update --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON' - msBuildCommandline: '"C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.BinariesDirectory)\Debug\onnxruntime.sln" /p:platform="x64" /p:configuration=Debug /p:VisualStudioVersion="17.0" /m /p:PreferredToolArchitecture=x64' - excludedPaths: '$(Build.BinariesDirectory)#$(Build.SourcesDirectory)\cmake#C:\program files (x86)' - - - task: TSAUpload@2 - displayName: 'TSA upload' - condition: and(and (succeeded(), eq(variables['PythonVersion'], '3.8')), eq(variables['Build.SourceBranch'], 'refs/heads/main')) - inputs: - GdnPublishTsaOnboard: false - GdnPublishTsaConfigFile: '$(Build.sourcesDirectory)\.gdn\.gdntsa' - continueOnError: true - - - template: component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - - - - ${{ if eq(parameters.enable_linux_gpu, true) }}: - - job: Linux_py_GPU_Wheels - timeoutInMinutes: 300 - workspace: - clean: all - pool: Onnxruntime-Linux-GPU - strategy: - matrix: - ${{ each PythonVersion in parameters.python_version }}: - 'Python${{ PythonVersion }}': - PythonVersion: ${{ PythonVersion }} - steps: - - checkout: self - clean: true - submodules: recursive - - - template: set-python-manylinux-variables-step.yml - - - template: get-docker-image-steps.yml - parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_8_tensorrt8_6 - Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-11/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-11/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-11/root/usr/lib64:/opt/rh/devtoolset-11/root/usr/lib:/opt/rh/devtoolset-11/root/usr/lib64/dyninst:/opt/rh/devtoolset-11/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecuda118xtrt86build - - - task: CmdLine@2 - displayName: 'Build Python Wheel' - inputs: - script: | - mkdir -p $HOME/.onnx - docker run --gpus all -e CC=/opt/rh/devtoolset-11/root/usr/bin/cc -e CXX=/opt/rh/devtoolset-11/root/usr/bin/c++ -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \ - --volume /data/onnx:/data/onnx:ro \ - --volume $(Build.SourcesDirectory):/onnxruntime_src \ - --volume $(Build.BinariesDirectory):/build \ - --volume /data/models:/build/models:ro \ - --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \ - -e BUILD_BUILDNUMBER \ - onnxruntimecuda118xtrt86build \ - $(PythonManylinuxDir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \ - --build_dir /build --cmake_generator Ninja \ - --config Release --update --build \ - --skip_submodule_sync \ - --parallel \ - --build_wheel \ - --enable_onnx_tests --use_tensorrt --cuda_version=11.8 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \ - ${{ parameters.gpu_build_py_parameters }} \ - --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=75;80;90' - workingDirectory: $(Build.SourcesDirectory) - - - task: CmdLine@2 - displayName: 'Running tests' - condition: and(succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - script: | - set -e -x - rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11 - sudo rm -f /build /onnxruntime_src - sudo ln -s $(Build.SourcesDirectory) /onnxruntime_src - python3 -m pip uninstall -y onnxruntime onnxruntime-gpu onnxruntime-training onnxruntime-directml -qq - cp $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt $(Build.BinariesDirectory)/requirements.txt - # Test ORT with the latest ONNX release. - sed -i "s/git+http:\/\/github\.com\/onnx\/onnx.*/onnx/" $(Build.BinariesDirectory)/requirements.txt - python3 -m pip install -r $(Build.BinariesDirectory)/requirements.txt - python3 -m pip install $(Build.BinariesDirectory)/Release/dist/*.whl - cd $(Build.BinariesDirectory)/Release - ls $(Build.BinariesDirectory)/models - rmdir $(Build.BinariesDirectory)/models - ln -sf /data/models $(Build.BinariesDirectory) - python3 /onnxruntime_src/tools/ci_build/build.py \ - --build_dir $(Build.BinariesDirectory) --cmake_generator Ninja \ - --config Release --test \ - --skip_submodule_sync \ - --parallel \ - --build_wheel \ - --enable_onnx_tests --use_tensorrt --cuda_version=11.8 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \ - ${{ parameters.gpu_build_py_parameters }} --ctest_path '' \ - --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=75;80;90' - - - task: CopyFiles@2 - displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)/Release/dist' - Contents: '*.whl' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: ONNXRuntime python wheel' - inputs: - ArtifactName: onnxruntime_gpu - - - template: component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - - - template: clean-agent-build-directory-step.yml - - - ${{ if eq(parameters.enable_windows_gpu, true) }}: - - job: Windows_py_GPU_Wheels - workspace: - clean: all - pool: 'onnxruntime-Win2022-GPU-A10' - timeoutInMinutes: 300 - variables: - - template: common-variables.yml - CUDA_VERSION: '11.8' - buildArch: x64 - EpBuildFlags: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }}" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90" - EnvSetupScript: setup_env_gpu.bat - EP_NAME: gpu - VSGenerator: 'Visual Studio 17 2022' - strategy: - matrix: - ${{ each PythonVersion in parameters.python_version }}: - 'Python${{ PythonVersion }}': - PythonVersion: ${{ PythonVersion }} - steps: - - checkout: self - clean: true - submodules: recursive - - - task: UsePythonVersion@0 - inputs: - versionSpec: $(PythonVersion) - addToPath: true - architecture: 'x64' - - - task: BatchScript@1 - displayName: 'setup env' - inputs: - filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\$(EnvSetupScript)' - modifyEnvironment: true - workingFolder: '$(Build.BinariesDirectory)' - - - task: PowerShell@2 - displayName: 'Install ONNX' - inputs: - filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/windows/install_third_party_deps.ps1' - workingDirectory: '$(Build.BinariesDirectory)' - arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\RelWithDebInfo\installed -build_config RelWithDebInfo - - - task: PythonScript@0 - displayName: 'Generate cmake config' - inputs: - scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: > - --config RelWithDebInfo - --build_dir $(Build.BinariesDirectory) - --skip_submodule_sync - --cmake_generator "$(VSGenerator)" - --enable_pybind - --enable_onnx_tests - ${{ parameters.gpu_build_py_parameters }} - --parallel --update - $(EpBuildFlags) - workingDirectory: '$(Build.BinariesDirectory)' - - - task: VSBuild@1 - displayName: 'Build' - inputs: - solution: '$(Build.BinariesDirectory)\RelWithDebInfo\onnxruntime.sln' - platform: x64 - configuration: RelWithDebInfo - msbuildArchitecture: $(buildArch) - maximumCpuCount: true - logProjectEvents: true - workingFolder: '$(Build.BinariesDirectory)\RelWithDebInfo' - createLogFile: true - - # Esrp signing - - template: win-esrp-dll.yml - parameters: - FolderPath: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\onnxruntime\capi' - DisplayName: 'ESRP - Sign Native dlls' - DoEsrp: true - Pattern: '*.pyd,*.dll' - - - task: PythonScript@0 - displayName: 'Build wheel' - inputs: - scriptPath: '$(Build.SourcesDirectory)\setup.py' - arguments: 'bdist_wheel ${{ parameters.gpu_build_py_parameters }} --wheel_name_suffix=$(EP_NAME)' - workingDirectory: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' - - - task: CopyFiles@2 - displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\dist' - Contents: '*.whl' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: ONNXRuntime python wheel' - inputs: - ArtifactName: onnxruntime_gpu - - - script: | - 7z x *.whl - workingDirectory: '$(Build.ArtifactStagingDirectory)' - displayName: 'unzip the package' - - - task: CredScan@3 - displayName: 'Run CredScan' - inputs: - debugMode: false - continueOnError: true - - - task: BinSkim@4 - displayName: 'Run BinSkim' - inputs: - AnalyzeTargetGlob: '+:file|$(Build.ArtifactStagingDirectory)\**\*.dll;-:file|$(Build.ArtifactStagingDirectory)\**\DirectML.dll' - - - task: DeleteFiles@1 - displayName: 'Delete files from $(Build.BinariesDirectory)\RelWithDebInfo' - condition: and (succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo' - Contents: | - **/*.obj - **/*.pdb - **/*.dll - - - powershell: | - python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq - Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate} - Remove-Item -Recurse -Force onnxruntime - python onnx_backend_test_series.py - workingDirectory: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' - displayName: 'Run Python Tests' - - #Manually set msBuildCommandline so that we can also set CAExcludePath - - task: SDLNativeRules@3 - displayName: 'Run the PREfast SDL Native Rules for MSBuild' - condition: and (succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - msBuildArchitecture: amd64 - setupCommandlines: 'python $(Build.SourcesDirectory)\tools\ci_build\build.py --config RelWithDebInfo --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "$(VSGenerator)" --enable_pybind --enable_onnx_tests ${{ parameters.gpu_build_py_parameters }} --parallel $(EpBuildFlags) --update --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON' - ${{if eq(variables.VSGenerator, 'Visual Studio 16 2019')}}: - msBuildCommandline: '"C:\Program Files\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.BinariesDirectory)\RelWithDebInfo\onnxruntime.sln" /p:platform=x64 /p:configuration="RelWithDebInfo" /p:VisualStudioVersion="16.0" /m /p:PreferredToolArchitecture=x64' - ${{else}}: - msBuildCommandline: '"C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.BinariesDirectory)\RelWithDebInfo\onnxruntime.sln" /p:platform=x64 /p:configuration="RelWithDebInfo" /p:VisualStudioVersion="17.0" /m /p:PreferredToolArchitecture=x64' - excludedPaths: '$(Build.BinariesDirectory)#$(Build.SourcesDirectory)\cmake#C:\program files (x86)' - - - task: TSAUpload@2 - displayName: 'TSA upload' - condition: and(and (succeeded(), eq(variables['PythonVersion'], '3.8')), eq(variables['Build.SourceBranch'], 'refs/heads/main')) - inputs: - GdnPublishTsaOnboard: false - GdnPublishTsaConfigFile: '$(Build.sourcesDirectory)\.gdn\.gdntsa' - continueOnError: true - - - template: component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - diff --git a/tools/ci_build/github/linux/build_cuda_c_api_package.sh b/tools/ci_build/github/linux/build_cuda_c_api_package.sh index 8a8d02936e773..129127c9497db 100755 --- a/tools/ci_build/github/linux/build_cuda_c_api_package.sh +++ b/tools/ci_build/github/linux/build_cuda_c_api_package.sh @@ -2,4 +2,4 @@ set -e -x docker run --rm --volume \ $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}build \ -/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed" +/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed" diff --git a/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh b/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh index ce6f0929c0906..58ea3054afdda 100755 --- a/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh +++ b/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh @@ -3,4 +3,4 @@ set -e -x mkdir -p $HOME/.onnx docker run --rm --volume /data/onnx:/data/onnx:ro --volume $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \ --volume /data/models:/build/models:ro --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}xtrt86build \ -/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed" +/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed" diff --git a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh index aef9793f696b6..614eb3a631e13 100755 --- a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh +++ b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh @@ -5,10 +5,10 @@ set -e set -x -export PATH=/opt/python/cp310-cp310/bin:$PATH BUILD_DIR=${1:?"usage: $0 "} +python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/python/requirements.txt # Validate the operator kernel registrations, as the ORT model uses hashes of the kernel registration details # to find kernels. If the hashes from the registration details are incorrect we will produce a model that will break # when the registration is fixed in the future. @@ -26,7 +26,6 @@ python3 /onnxruntime_src/tools/ci_build/build.py \ --build_wheel \ --skip_tests \ --enable_training_ops \ - --enable_pybind --cmake_extra_defines PYTHON_INCLUDE_DIR=/opt/python/cp310-cp310/include/python3.10 PYTHON_LIBRARY=/usr/lib64/librt.so \ --use_nnapi \ --use_coreml diff --git a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh index c857d3f1036bc..f5184b20d0a6c 100755 --- a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh +++ b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh @@ -7,7 +7,7 @@ set -e set -x -export PATH=/opt/python/cp310-cp310/bin:$PATH + USAGE_TEXT="Usage: -b|--build-directory Specifies the build directory. Required. @@ -65,7 +65,7 @@ if [[ -z "${BUILD_DIR}" || -z "${REDUCED_OPS_CONFIG_FILE}" ]]; then echo "$USAGE_TEXT" exit 1 fi - +python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/python/requirements.txt # Perform a minimal build with required ops and run ORT minimal build UTs python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir ${BUILD_DIR} --cmake_generator Ninja \ diff --git a/tools/ci_build/requirements/pybind/requirements.txt b/tools/ci_build/requirements/pybind/requirements.txt index 8f00a25627c21..0a8caecd11e4e 100644 --- a/tools/ci_build/requirements/pybind/requirements.txt +++ b/tools/ci_build/requirements/pybind/requirements.txt @@ -6,3 +6,4 @@ sympy>=1.10 packaging cerberus psutil +flatbuffers From a9bf0bedd83e2f47df2f225bf9359b82e3c9dd38 Mon Sep 17 00:00:00 2001 From: Adrian Lizarraga Date: Fri, 17 Jan 2025 15:45:49 -0800 Subject: [PATCH 3/3] [QNN EP] Fix regression for MatMul with two quantized/dynamic uint16 inputs (#23419) ### Description - Fixes regression for MatMul with two quantized/dynamic uint16 inputs. We need to convert input[1] to uint8 to pass QNN validation. - Separates translation of `ONNX MatMul -> QNN MatMul` and `ONNX MatMul -> QNN FullyConnected` to separate functions to make the code more readable. ### Motivation and Context The following PR updated the handling of MatMul. The logic to handle MatMul with two non-const uint16 inputs was not ported from [simple_op_builder.cc](https://github.com/microsoft/onnxruntime/blob/c64fa18834f0651b7d62507a34d802874b099c29/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc#L107) to the new [matmul_op_builder.cc](https://github.com/microsoft/onnxruntime/blob/c64fa18834f0651b7d62507a34d802874b099c29/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc#L57). https://github.com/microsoft/onnxruntime/pull/22639 --- .../builder/opbuilder/matmul_op_builder.cc | 342 ++++++++++++++---- .../builder/opbuilder/simple_op_builder.cc | 103 ------ .../qnn/builder/qnn_model_wrapper.cc | 14 + .../providers/qnn/builder/qnn_model_wrapper.h | 3 + .../test/providers/qnn/matmul_test.cpp | 59 ++- 5 files changed, 355 insertions(+), 166 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc index bac08f1993f47..850fd2875818e 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc @@ -13,11 +13,12 @@ namespace onnxruntime { namespace qnn { /** - * ONNX's MatMul supports 1D tensor as input on both size, but neither QNN's MatMul nor FullyConnected supports it. - * So we need to add Reshape Ops if necessary. + * An ONNX MatMul can be translated to either a QNN MatMul or a QNN FullyConnected. + * ONNX's MatMul suports inputs of rank 1, but neither QNN's MatMul nor FullyConnected support two rank 1 inputs. + * So, we need to add Reshape Ops if necessary. * In two cases, FullyConnected (input_1's shape is [n, k]) is used instead of MatMul without extra Transpose Op: - * 1. input_1 is 2D initializer. - * 2. input_1 is 1D tensor. + * 1. input_1 is a rank 2 initializer. + * 2. input_1 is a rank 1 tensor. */ class MatMulOpBuilder : public BaseOpBuilder { public: @@ -31,109 +32,207 @@ class MatMulOpBuilder : public BaseOpBuilder { Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, const logging::Logger& logger, bool do_op_validation) const override ORT_MUST_USE_RESULT; + + private: + Status ProcessInputsForQnnMatMul(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const TensorInfo& input_info_0, + const TensorInfo& input_info_1, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const ORT_MUST_USE_RESULT; + Status ProcessInputsForQnnFullyConnected(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const TensorInfo& input_info_0, + const TensorInfo& input_info_1, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const ORT_MUST_USE_RESULT; }; namespace { +// Inserts a QNN Convert operator to convert from one quantization type (e.g., uint16) to another (e.g., uint8). +Status InsertConvertOp(QnnModelWrapper& qnn_model_wrapper, + const std::string& convert_input_name, + const std::string& convert_output_name, + Qnn_DataType_t input_qnn_data_type, + Qnn_DataType_t output_qnn_data_type, + int32_t input_offset, + float input_scale, + const std::vector& output_shape, + bool do_op_validation) { + // Assume input is already handled. + float qmin = 0.0f; + float qmax = 255.0f; + ORT_RETURN_IF_ERROR(qnn::utils::GetQminQmax(input_qnn_data_type, qmin, qmax)); + double value_min = qnn::utils::Dequantize(input_offset, input_scale, qmin); + double value_max = qnn::utils::Dequantize(input_offset, input_scale, qmax); + float scale = 0.0f; + int32_t offset = 0; + ORT_RETURN_IF_ERROR(qnn::utils::GetQuantParams(static_cast(value_min), + static_cast(value_max), + output_qnn_data_type, + scale, + offset)); + + std::vector output_shape_copy = output_shape; + QnnTensorWrapper convert_output_tensorwrapper(convert_output_name, + QNN_TENSOR_TYPE_NATIVE, + output_qnn_data_type, + QnnQuantParamsWrapper(scale, offset), + std::move(output_shape_copy)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(convert_output_tensorwrapper)), "Failed to add tensor."); + + ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(convert_output_name, + QNN_OP_PACKAGE_NAME_QTI_AISW, + "Convert", + {convert_input_name}, + {convert_output_name}, + {}, + do_op_validation), + "Failed to add node."); + return Status::OK(); +} + +inline bool IsQuant16bit(Qnn_DataType_t qnn_data_type) { + return qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16 || qnn_data_type == QNN_DATATYPE_SFIXED_POINT_16; +} + Status CheckInputs(const QnnModelWrapper& qnn_model_wrapper, const NodeUnitIODef& input_def_0, const NodeUnitIODef& input_def_1, TensorInfo& input_info_0, TensorInfo& input_info_1, bool& use_fully_connected) { ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input_def_0, input_info_0)); ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input_def_1, input_info_1)); - // Use FullyConnected if 2nd input is 2D initializer or 1D tensor. +#if QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR <= 20 + // Validation crashes if use QNN FullyConnected in QNN SDK versions 2.26 - 2.27 + // Just use QNN MatMul for these older QNN SDK versions. + use_fully_connected = false; +#else + // Use FullyConnected if 2nd input is a rank 2 initializer or a rank 1 tensor. // FullyConnected cannot pass the Op validation if keep_dims is true, so if input_0 is per-channel quantized tensor - // with rank > 2, it's not easy to set the quantization parameters for the output reshaped 2D tensor. + // with rank > 2, it's not easy to set the quantization parameters for the output reshaped rank 2 tensor. // In this case, we will not use FullyConnected. use_fully_connected = (input_info_1.shape.size() == 2 && input_info_1.is_initializer) || input_info_1.shape.size() == 1; use_fully_connected = use_fully_connected && !(input_info_0.quant_param.IsPerChannel() && input_info_0.shape.size() > 2); + // Don't use FullyConnected if both inputs are dynamic and uint16 (quantized) + use_fully_connected = use_fully_connected && !(IsQuant16bit(input_info_0.qnn_data_type) && + !input_info_0.is_initializer && + IsQuant16bit(input_info_1.qnn_data_type) && + !input_info_1.is_initializer); +#endif return Status::OK(); } -} // namespace +// Process input[0] for ONNX MatMul that can be translated to either a QNN MatMul or a QNN FullyConnected. +Status ProcessInput0(QnnModelWrapper& qnn_model_wrapper, + const TensorInfo& input_0_info, + const std::string& original_input_0_name, + std::vector& input_names, + const logging::Logger& logger, + bool do_op_validation) { + bool reshape_input_0 = input_0_info.shape.size() == 1; + std::string actual_input_0_name = original_input_0_name; -Status MatMulOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, - const logging::Logger& logger, std::vector& input_names, - bool do_op_validation) const { - const auto& inputs = node_unit.Inputs(); - TensorInfo input_info_0{}; - TensorInfo input_info_1{}; - bool use_fully_connected = false; - ORT_RETURN_IF_ERROR( - CheckInputs(qnn_model_wrapper, inputs[0], inputs[1], input_info_0, input_info_1, use_fully_connected)); - bool reshape_input_0 = input_info_0.shape.size() == 1; - bool reshape_input_1 = input_info_1.shape.size() == 1; - - // Process input 0. - const std::string& org_input_0_name = inputs[0].node_arg.Name(); - std::string input_0_name = org_input_0_name; if (reshape_input_0) { - input_0_name = org_input_0_name + "_ort_qnn_ep_reshape"; - std::vector shape_2d{1, input_info_0.shape[0]}; - QnnQuantParamsWrapper quant_param_2d = input_info_0.quant_param.Copy(); - ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_info_0.shape, shape_2d)); + actual_input_0_name = original_input_0_name + "_ort_qnn_ep_reshape"; + std::vector shape_2d{1, input_0_info.shape[0]}; + QnnQuantParamsWrapper quant_param_2d = input_0_info.quant_param.Copy(); + ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_0_info.shape, shape_2d)); // If input_0 is initializer, unpack it and add the tensor with new quantization parameter and shape. // Otherwise, add a Reshape node. - if (input_info_0.is_initializer) { + if (input_0_info.is_initializer) { std::vector unpacked_tensor; - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info_0.initializer_tensor, unpacked_tensor)); - Qnn_TensorType_t tensor_type = qnn_model_wrapper.GetTensorType(org_input_0_name); - QnnTensorWrapper input_tensorwrapper(input_0_name, tensor_type, input_info_0.qnn_data_type, + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_0_info.initializer_tensor, unpacked_tensor)); + QnnTensorWrapper input_tensorwrapper(actual_input_0_name, QNN_TENSOR_TYPE_STATIC, input_0_info.qnn_data_type, std::move(quant_param_2d), std::move(shape_2d), std::move(unpacked_tensor)); ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); } else { - ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(org_input_0_name, input_0_name, input_info_0.shape, shape_2d, - input_info_0.qnn_data_type, input_info_0.quant_param, + ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(original_input_0_name, actual_input_0_name, + input_0_info.shape, shape_2d, + input_0_info.qnn_data_type, input_0_info.quant_param, quant_param_2d, do_op_validation, - qnn_model_wrapper.IsGraphInput(org_input_0_name), false)); + qnn_model_wrapper.IsGraphInput(original_input_0_name), false)); } } else { - if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_0_name)) { - LOGS(logger, VERBOSE) << "Tensor already added, skip it: " << input_0_name; + if (qnn_model_wrapper.IsQnnTensorWrapperExist(actual_input_0_name)) { + LOGS(logger, VERBOSE) << "Tensor already added, skip it: " << actual_input_0_name; } else { QnnTensorWrapper input_0_tensor; - ORT_RETURN_IF_ERROR(qnn_model_wrapper.MakeTensorWrapper(inputs[0], input_0_tensor)); + ORT_RETURN_IF_ERROR(qnn_model_wrapper.MakeTensorWrapper(input_0_info, actual_input_0_name, input_0_tensor)); ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_0_tensor)), "Failed to add tensor."); } } - input_names.emplace_back(input_0_name); + input_names.emplace_back(actual_input_0_name); + + return Status::OK(); +} +} // namespace + +// Process operator inputs. Dispatches to other processing functions depending on whether we're +// translating an ONNX MatMul to a QNN MatMul or a QNN FullyConnected. +Status MatMulOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, + const logging::Logger& logger, std::vector& input_names, + bool do_op_validation) const { + const auto& inputs = node_unit.Inputs(); + TensorInfo input_info_0{}; + TensorInfo input_info_1{}; + bool use_fully_connected = false; + ORT_RETURN_IF_ERROR( + CheckInputs(qnn_model_wrapper, inputs[0], inputs[1], input_info_0, input_info_1, use_fully_connected)); + + if (use_fully_connected) { + return ProcessInputsForQnnFullyConnected(qnn_model_wrapper, + node_unit, + input_info_0, + input_info_1, + logger, + input_names, + do_op_validation); + } + return ProcessInputsForQnnMatMul(qnn_model_wrapper, + node_unit, + input_info_0, + input_info_1, + logger, + input_names, + do_op_validation); +} + +Status MatMulOpBuilder::ProcessInputsForQnnMatMul(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const TensorInfo& input_info_0, + const TensorInfo& input_info_1, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const { + const auto& inputs = node_unit.Inputs(); + const bool reshape_input_1 = input_info_1.shape.size() == 1; + + const std::string& org_input_0_name = inputs[0].node_arg.Name(); + ORT_RETURN_IF_ERROR(ProcessInput0(qnn_model_wrapper, input_info_0, org_input_0_name, input_names, + logger, do_op_validation)); // Process input 1. const std::string& org_input_1_name = inputs[1].node_arg.Name(); std::string input_1_name = org_input_1_name; - if (reshape_input_1 || use_fully_connected) { + if (reshape_input_1) { + // Input[1] is a rank 1 tensor that needs to be reshaped. std::vector shape_2d; QnnQuantParamsWrapper quant_param_2d = input_info_1.quant_param.Copy(); - if (reshape_input_1) { - // Input is 1D tensor. - input_1_name = org_input_1_name + "_ort_qnn_ep_reshape"; - if (use_fully_connected) { - // FullyConnected requires input_1's shape to be [n, k]. - shape_2d = {1, input_info_1.shape[0]}; - } else { - shape_2d = {input_info_1.shape[0], 1}; - } - ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_info_1.shape, shape_2d)); - } else { - input_1_name = org_input_1_name + "_ort_qnn_ep_transpose"; - shape_2d = {input_info_1.shape[1], input_info_1.shape[0]}; - ORT_RETURN_IF_ERROR(quant_param_2d.HandleTranspose(std::vector({1, 0}))); - } + input_1_name = org_input_1_name + "_ort_qnn_ep_reshape"; + shape_2d = {input_info_1.shape[0], 1}; + ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_info_1.shape, shape_2d)); // If input_1 is initializer, unpack it and add the tensor with new quantization parameter and shape. // Otherwise, add a Reshape node. if (input_info_1.is_initializer) { std::vector unpacked_tensor; - if (use_fully_connected && !reshape_input_1) { - // 2D initializer should be transposed to [n, k]. - ORT_RETURN_IF_ERROR(TwoDimensionTranspose(qnn_model_wrapper, input_info_1.shape, - *input_info_1.initializer_tensor, unpacked_tensor)); - } else { - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info_1.initializer_tensor, unpacked_tensor)); - } + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info_1.initializer_tensor, unpacked_tensor)); Qnn_TensorType_t tensor_type = qnn_model_wrapper.GetTensorType(org_input_1_name); QnnTensorWrapper input_tensorwrapper(input_1_name, tensor_type, input_info_1.qnn_data_type, @@ -156,6 +255,108 @@ Status MatMulOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const } input_names.emplace_back(input_1_name); + // Workaround that inserts a QNN Convert op before input[1] (converts from quantized uint16 to quantized uint8) + // to avoid a QNN validation failure. + // + // QNN graph WITHOUT workaround (fails validation): + // input_0_uint16 ---> MatMul ---> output_uint16 + // ^ + // | + // input_1_uint16 -----+ + // + // QNN graph WITH workaround (passes validation): + // input_0_uint16 ----------------------> MatMul ---> output_uint16 + // ^ + // | + // input_1_uint16 --> Convert(to uint8) --+ + if (!input_info_0.is_initializer && !input_info_1.is_initializer && + input_info_0.qnn_data_type == input_info_1.qnn_data_type && + input_info_0.qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) { + ORT_RETURN_IF_NOT(input_info_1.quant_param.IsPerTensor(), + "MatMul's activation inputs only support per-tensor quantization"); + const Qnn_QuantizeParams_t& quant_param = input_info_1.quant_param.Get(); + // insert Convert op after input1 + std::string convert_input_name = input_names.back(); + input_names.pop_back(); + const std::string& matmul_output_name = node_unit.Outputs()[0].node_arg.Name(); + std::string convert_output_name = convert_input_name + "_convert_" + matmul_output_name; + std::vector input_1_shape = input_info_1.shape; + if (reshape_input_1) { + input_1_shape = {input_info_1.shape[0], 1}; + } + ORT_RETURN_IF_ERROR(InsertConvertOp(qnn_model_wrapper, + convert_input_name, + convert_output_name, + input_info_1.qnn_data_type, + QNN_DATATYPE_UFIXED_POINT_8, + quant_param.scaleOffsetEncoding.offset, + quant_param.scaleOffsetEncoding.scale, + input_1_shape, + do_op_validation)); + input_names.push_back(convert_output_name); + } + return Status::OK(); +} + +Status MatMulOpBuilder::ProcessInputsForQnnFullyConnected(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const TensorInfo& input_info_0, + const TensorInfo& input_info_1, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const { + const auto& inputs = node_unit.Inputs(); + const bool reshape_input_1 = input_info_1.shape.size() == 1; + + const std::string& org_input_0_name = inputs[0].node_arg.Name(); + ORT_RETURN_IF_ERROR(ProcessInput0(qnn_model_wrapper, input_info_0, org_input_0_name, input_names, + logger, do_op_validation)); + + // Process input 1. + const std::string& org_input_1_name = inputs[1].node_arg.Name(); + std::string input_1_name = org_input_1_name; + std::vector shape_2d; + QnnQuantParamsWrapper quant_param_2d = input_info_1.quant_param.Copy(); + if (reshape_input_1) { + // Input[1] is a rank 1 tensor that needs to be reshaped. + input_1_name = org_input_1_name + "_ort_qnn_ep_reshape"; + + // FullyConnected requires input_1's shape to be [n, k]. + shape_2d = {1, input_info_1.shape[0]}; + ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_info_1.shape, shape_2d)); + } else { + assert(input_info_1.shape.size() == 2); + input_1_name = org_input_1_name + "_ort_qnn_ep_transpose"; + shape_2d = {input_info_1.shape[1], input_info_1.shape[0]}; + ORT_RETURN_IF_ERROR(quant_param_2d.HandleTranspose(std::vector({1, 0}))); + } + + // If input_1 is initializer, unpack it and add the tensor with new quantization parameter and shape. + // Otherwise, add a Reshape node. + if (input_info_1.is_initializer) { + std::vector unpacked_tensor; + if (!reshape_input_1) { + // 2D initializer should be transposed to [n, k]. + std::vector original_shape_copy = input_info_1.shape; + ORT_RETURN_IF_ERROR(TwoDimensionTranspose(qnn_model_wrapper, + original_shape_copy, // Will be modified to new shape (unnecessary) + *input_info_1.initializer_tensor, + unpacked_tensor)); + } else { + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info_1.initializer_tensor, unpacked_tensor)); + } + + Qnn_TensorType_t tensor_type = qnn_model_wrapper.GetTensorType(org_input_1_name); + QnnTensorWrapper input_tensorwrapper(input_1_name, tensor_type, input_info_1.qnn_data_type, + std::move(quant_param_2d), std::move(shape_2d), std::move(unpacked_tensor)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); + } else { + ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(org_input_1_name, input_1_name, input_info_1.shape, shape_2d, + input_info_1.qnn_data_type, input_info_1.quant_param, + quant_param_2d, do_op_validation, + qnn_model_wrapper.IsGraphInput(org_input_1_name), false)); + } + input_names.emplace_back(input_1_name); return Status::OK(); } @@ -172,6 +373,24 @@ Status MatMulOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w bool reshape_input_1 = input_info_1.shape.size() == 1; bool reshape_output = reshape_input_0 || reshape_input_1 || (use_fully_connected && input_info_0.shape.size() > 2); + // For QNN MatMul: set the input transpose parameters to their default values of 0. These parameters should be + // optional, but older versions of QNN SDK failed validation if not explicitly provided. + std::vector param_tensor_names; + if (!use_fully_connected) { + Qnn_Scalar_t scalar_param = QNN_SCALAR_INIT; + scalar_param.dataType = QNN_DATATYPE_BOOL_8; + scalar_param.bool8Value = 0; + QnnParamWrapper transpose_in0_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN0, + scalar_param); + param_tensor_names.push_back(transpose_in0_param.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(transpose_in0_param)); + + QnnParamWrapper transpose_in1_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, + scalar_param); + param_tensor_names.push_back(transpose_in1_param.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(transpose_in1_param)); + } + const std::string& org_output_name = node_unit.Outputs()[0].node_arg.Name(); std::string op_output_name = org_output_name; TensorInfo output_info{}; @@ -207,7 +426,8 @@ Status MatMulOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w "Failed to add output tensor."); ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(utils::GetNodeName(node_unit), QNN_OP_PACKAGE_NAME_QTI_AISW, use_fully_connected ? QNN_OP_FULLY_CONNECTED : QNN_OP_MAT_MUL, - std::move(input_names), {op_output_name}, {}, do_op_validation), + std::move(input_names), {op_output_name}, + std::move(param_tensor_names), do_op_validation), "Failed to add fused Matmul node."); if (reshape_output) { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc index a6c4203ad92e4..9902617b71596 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc @@ -22,11 +22,6 @@ class SimpleOpBuilder : public BaseOpBuilder { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SimpleOpBuilder); protected: - Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper, - const NodeUnit& node_unit, - const logging::Logger& logger, - std::vector& input_names, - bool do_op_validation) const override ORT_MUST_USE_RESULT; Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, @@ -53,91 +48,6 @@ class SimpleOpBuilder : public BaseOpBuilder { static constexpr std::array gridsample_supported_padding_modes = {"zeros", "border", "reflection"}; }; -// Move to qnn_utils if it's re-usable -Status InsertConvertOp(QnnModelWrapper& qnn_model_wrapper, - const std::string& convert_input_name, - const std::string& convert_output_name, - Qnn_DataType_t input_qnn_data_type, - Qnn_DataType_t output_qnn_data_type, - int32_t input_offset, - float input_scale, - const std::vector& output_shape, - bool do_op_validation) { - // Assume input is already handled. - float qmin = 0.0f; - float qmax = 255.0f; - ORT_RETURN_IF_ERROR(qnn::utils::GetQminQmax(input_qnn_data_type, qmin, qmax)); - double value_min = qnn::utils::Dequantize(input_offset, input_scale, qmin); - double value_max = qnn::utils::Dequantize(input_offset, input_scale, qmax); - float scale = 0.0f; - int32_t offset = 0; - ORT_RETURN_IF_ERROR(qnn::utils::GetQuantParams(static_cast(value_min), - static_cast(value_max), - output_qnn_data_type, - scale, - offset)); - - std::vector output_shape_copy = output_shape; - QnnTensorWrapper convert_output_tensorwrapper(convert_output_name, - QNN_TENSOR_TYPE_NATIVE, - output_qnn_data_type, - QnnQuantParamsWrapper(scale, offset), - std::move(output_shape_copy)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(convert_output_tensorwrapper)), "Failed to add tensor."); - - ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(convert_output_name, - QNN_OP_PACKAGE_NAME_QTI_AISW, - "Convert", - {convert_input_name}, - {convert_output_name}, - {}, - do_op_validation), - "Failed to add node."); - return Status::OK(); -} - -Status SimpleOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, - const NodeUnit& node_unit, - const logging::Logger& logger, - std::vector& input_names, - bool do_op_validation) const { - const std::string& op_type = node_unit.OpType(); - ORT_RETURN_IF_ERROR(BaseOpBuilder::ProcessInputs(qnn_model_wrapper, node_unit, logger, input_names, do_op_validation)); - - if (op_type == "MatMul") { - const auto& inputs = node_unit.Inputs(); - TensorInfo input0_info = {}; - TensorInfo input1_info = {}; - ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info)); - ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input1_info)); - // Need to insert Convert op if both inputs are dynamic inputs and are ufixed_16 - if (!input0_info.is_initializer && !input1_info.is_initializer && - input0_info.qnn_data_type == input1_info.qnn_data_type && - input0_info.qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) { - ORT_RETURN_IF_NOT(input1_info.quant_param.IsPerTensor(), - "MatMul's activation inputs only support per-tensor quantization"); - const Qnn_QuantizeParams_t& quant_param = input1_info.quant_param.Get(); - // insert Convert op after input1 - std::string convert_input_name = input_names.back(); - input_names.pop_back(); - const std::string& matmul_output_name = node_unit.Outputs()[0].node_arg.Name(); - std::string convert_output_name = convert_input_name + "_convert_" + matmul_output_name; - ORT_RETURN_IF_ERROR(InsertConvertOp(qnn_model_wrapper, - convert_input_name, - convert_output_name, - input1_info.qnn_data_type, - QNN_DATATYPE_UFIXED_POINT_8, - quant_param.scaleOffsetEncoding.offset, - quant_param.scaleOffsetEncoding.scale, - input1_info.shape, - do_op_validation)); - input_names.push_back(convert_output_name); - } - } - - return Status::OK(); -} - Status SimpleOpBuilder::ExplicitOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const { const std::string& op_type = node_unit.OpType(); @@ -378,19 +288,6 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w ORT_RETURN_IF(norm_p_order != 2, "QNN EP only supports LpNormalization with 'p' attribute equal to 2."); } - if (op_type == "MatMul") { - Qnn_Scalar_t scalar_param = QNN_SCALAR_INIT; - scalar_param.dataType = QNN_DATATYPE_BOOL_8; - scalar_param.bool8Value = 0; - QnnParamWrapper transpose_in0_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN0, scalar_param); - param_tensor_names.push_back(transpose_in0_param.GetParamTensorName()); - qnn_model_wrapper.AddParamWrapper(std::move(transpose_in0_param)); - - QnnParamWrapper transpose_in1_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, scalar_param); - param_tensor_names.push_back(transpose_in1_param.GetParamTensorName()); - qnn_model_wrapper.AddParamWrapper(std::move(transpose_in1_param)); - } - if (op_type == "LeakyRelu") { std::string input_name = "alpha"; ORT_RETURN_IF_ERROR(ProcessAlphaAttributeAsInput(qnn_model_wrapper, node_unit, input_name)); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index 9147bade3b1e7..79f8f176a2e76 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -75,6 +75,20 @@ Status QnnModelWrapper::MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensor return Status::OK(); } +Status QnnModelWrapper::MakeTensorWrapper(const TensorInfo& tensor_info, + const std::string& tensor_name, + QnnTensorWrapper& tensor_wrapper) const { + std::vector unpacked_tensor; + if (tensor_info.is_initializer) { + ORT_RETURN_IF_ERROR(UnpackInitializerData(*tensor_info.initializer_tensor, unpacked_tensor)); + } + + tensor_wrapper = QnnTensorWrapper(tensor_name, GetTensorType(tensor_name), tensor_info.qnn_data_type, + tensor_info.quant_param.Copy(), std::vector(tensor_info.shape), + std::move(unpacked_tensor)); + return Status::OK(); +} + bool QnnModelWrapper::AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper) { // Keep a copy of tensor name sine it will be moved with the wrapper into model_tensors_map_ std::string tensor_name = tensor_wrapper.GetName(); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h index e24d4939dd201..d018ca12d6451 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h @@ -66,6 +66,9 @@ class QnnModelWrapper { // Make a QnnTensorWrapper from an onnx input or output. Status MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensorWrapper& tensor_wrapper) const; + Status MakeTensorWrapper(const TensorInfo& tensor_info, + const std::string& tensor_name, + QnnTensorWrapper& tensor_wrapper) const; // Add to internal tensor wrapper table bool AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper); diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp index c6cbf0236c6ec..4b2aee5f6a14f 100644 --- a/onnxruntime/test/providers/qnn/matmul_test.cpp +++ b/onnxruntime/test/providers/qnn/matmul_test.cpp @@ -290,10 +290,65 @@ TEST_F(QnnHTPBackendTests, MatMulOp_QDQ) { RunQDQPerChannelMatMulOpTest({2, 3, 3, 3}, {3, 2}, -1, QDQTolerance(), ExpectedEPNodeAssignment::All, 18, true); - // // UINT16, per-channel INT8 weight + // UINT16, per-channel INT8 weight RunQDQPerChannelMatMulOpTest({2, 3}, {3, 2}, 1, QDQTolerance(), ExpectedEPNodeAssignment::All, 21, false, false); - RunQDQPerChannelMatMulOpTest({2, 3, 3}, {3}, -1); + RunQDQPerChannelMatMulOpTest({2, 3, 3}, {3}, -1, QDQTolerance(0.0041f)); +} + +// Tests MatMul with two uint16 (quantized) inputs that are both dynamic. +// This exercises a workaround in QNN EP that inserts a QNN Convert op before input[1] (converts from uint16 to uint8). +// This workaround prevents a validation error for this specific MatMul configuration. +// Got specific shapes and input ranges (quant params) from customer model. +TEST_F(QnnHTPBackendTests, MatMulOp_QDQ_Regression_uint16_dynamic_inputs) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + // Test with rank 4 inputs + { + std::vector shape_0 = {1, 12, 512, 96}; + TestInputDef input0_def( + {1, 12, 512, 96}, false, + GetFloatDataInRange(-5.087f, 4.992f, + static_cast(std::accumulate(shape_0.begin(), shape_0.end(), static_cast(1), + std::multiplies())))); + std::vector shape_1 = {1, 12, 96, 512}; + TestInputDef input1_def( + shape_1, false, + GetFloatDataInRange(-6.772f, 7.258f, + static_cast(std::accumulate(shape_1.begin(), shape_1.end(), static_cast(1), + std::multiplies())))); + + TestQDQModelAccuracy( + BuildMatMulOpTestCase(input0_def, input1_def), + BuildMatMulOpQDQTestCase(input0_def, input1_def, false), + provider_options, 21, ExpectedEPNodeAssignment::All, QDQTolerance()); + } + + // Test with input[1] as rank 1 + { + std::vector shape_0 = {1, 12, 512, 96}; + TestInputDef input0_def( + {1, 12, 512, 96}, false, + GetFloatDataInRange(-5.087f, 4.992f, + static_cast(std::accumulate(shape_0.begin(), shape_0.end(), static_cast(1), + std::multiplies())))); + std::vector shape_1 = {96}; + TestInputDef input1_def( + shape_1, false, + GetFloatDataInRange(-6.772f, 7.258f, + static_cast(std::accumulate(shape_1.begin(), shape_1.end(), static_cast(1), + std::multiplies())))); + + TestQDQModelAccuracy( + BuildMatMulOpTestCase(input0_def, input1_def), + BuildMatMulOpQDQTestCase(input0_def, input1_def, false), + provider_options, 21, ExpectedEPNodeAssignment::All, QDQTolerance()); + } } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)