Skip to content

[ARM CPU] hgemm optimized for gqa #8142

[ARM CPU] hgemm optimized for gqa

[ARM CPU] hgemm optimized for gqa #8142

Workflow file for this run

name: Mac_CI
on:
push:
branches:
- main
- rel-*
pull_request:
branches:
- main
- rel-*
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
python_version: 3.11
jobs:
ARM64-Xcode16:
runs-on: macos-15
env:
xcode_version: 16
timeout-minutes: 60
steps:
- uses: actions/setup-python@v5
with:
python-version: ${{ env.python_version }}
- name: Verify ARM64 machine
shell: python
run: |
import platform
assert platform.machine() == "arm64", "This job expects to be run on an ARM64 machine."
- name: Use Xcode ${{ env.xcode_version }}
shell: bash
run: |
XCODE_DEVELOPER_DIR="/Applications/Xcode_${{ env.xcode_version }}.app/Contents/Developer"
sudo xcode-select --switch "${XCODE_DEVELOPER_DIR}"
- uses: actions/checkout@v4
- name: Build and test
shell: bash
run: |
python ./tools/ci_build/build.py \
--build_dir ./build \
--update \
--build --parallel \
--test \
--build_shared_lib \
--build_objc \
--use_coreml \
--use_xnnpack \
--use_binskim_compliant_compile_flags
ARM64-Xcode16-targeting-iphonesimulator:
runs-on: macos-15
env:
xcode_version: 16
strategy:
matrix:
target_arch: [x86_64, arm64]
timeout-minutes: 60
steps:
- uses: actions/setup-python@v5
with:
python-version: ${{ env.python_version }}
- name: Verify ARM64 machine
shell: python
run: |
import platform
assert platform.machine() == "arm64", "This job expects to be run on an ARM64 machine."
- name: Use Xcode ${{ env.xcode_version }}
shell: bash
run: |
XCODE_DEVELOPER_DIR="/Applications/Xcode_${{ env.xcode_version }}.app/Contents/Developer"
sudo xcode-select --switch "${XCODE_DEVELOPER_DIR}"
- uses: actions/checkout@v4
- name: Build for iphonesimulator ${{ matrix.target_arch }}
shell: bash
run: |
python ./tools/ci_build/build.py \
--build_dir ./build \
--update \
--build --parallel \
--test \
--build_apple_framework \
--use_xcode \
--use_coreml \
--use_xnnpack \
--use_binskim_compliant_compile_flags \
--ios \
--apple_deploy_target=15.1 \
--apple_sysroot=iphonesimulator \
--osx_arch=${{ matrix.target_arch }}
Vcpkg:
runs-on: macos-13
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.python_version }}
- name: "Run Homebrew"
run: brew install autoconf automake autoconf-archive
- name: "Run vcpkg(x64-osx)"
uses: lukka/run-vcpkg@v11
with:
vcpkgDirectory: "${{ runner.temp }}/vcpkg"
vcpkgGitCommitId: "b322364f06308bdd24823f9d8f03fe0cc86fd46f" # 2024.12.16
runVcpkgInstall: true # vcpkg install --x-manifest-root cmake --x-install-root .build --overlay-triplets cmake/vcpkg-triplets/default
vcpkgJsonGlob: "cmake/vcpkg.json"
vcpkgConfigurationJsonGlob: "cmake/vcpkg-configuration.json"
env:
VCPKG_INSTALLED_DIR: "${{ github.workspace }}/.build"
VCPKG_DEFAULT_TRIPLET: "x64-osx"
VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/cmake/vcpkg-triplets/default"
# VCPKG_BINARY_SOURCES: "default" # https://learn.microsoft.com/en-us/vcpkg/reference/binarycaching
- name: "Run compile_schema.py"
run: |
set -e -x
# Runner's host triplet should be x64-osx or arm64-osx
export FLATC_DIR="${{ github.workspace }}/.build/${{ runner.arch }}-osx/tools/flatbuffers"
export PATH="$FLATC_DIR:$PATH"
flatc --version
python onnxruntime/core/flatbuffers/schema/compile_schema.py --flatc "$(which flatc)"
python onnxruntime/lora/adapter_format/compile_schema.py --flatc "$(which flatc)"
- name: "Detect protoc"
id: protoc-detect
run: |
export PROTOC_DIR="${{ github.workspace }}/.build/${{ runner.arch }}-osx/tools/protobuf"
export PATH="$PROTOC_DIR:$PATH"
protoc --version
echo "protoc_path=$(which protoc)" >> "$GITHUB_OUTPUT"
- name: "Run build.py(x64-osx)"
run: |
python ./tools/ci_build/build.py \
--build_dir "build/x64-osx" \
--skip_submodule_sync \
--skip_tests \
--compile_no_warning_as_error \
--parallel \
--path_to_protoc_exe "${{ steps.protoc-detect.outputs.protoc_path }}" \
--osx_arch x86_64 \
--use_vcpkg \
--cmake_extra_defines "CMAKE_TOOLCHAIN_FILE:FILEPATH=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" \
--cmake_extra_defines "VCPKG_TARGET_TRIPLET=x64-osx" \
--cmake_extra_defines "VCPKG_INSTALLED_DIR:PATH=${{ github.workspace }}/.build" \
--cmake_extra_defines "VCPKG_INSTALL_OPTIONS=--x-feature=tests"
env:
VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/cmake/vcpkg-triplets/default"
shell: bash
- name: "Run vcpkg(arm64-osx)"
uses: lukka/run-vcpkg@v11
with:
vcpkgDirectory: "${{ runner.temp }}/vcpkg"
doNotUpdateVcpkg: true
runVcpkgInstall: true # vcpkg install --x-manifest-root cmake --x-install-root .build --overlay-triplets cmake/vcpkg-triplets/default
vcpkgJsonGlob: "cmake/vcpkg.json"
vcpkgConfigurationJsonGlob: "cmake/vcpkg-configuration.json"
env:
VCPKG_INSTALLED_DIR: "${{ github.workspace }}/.build"
VCPKG_DEFAULT_TRIPLET: "arm64-osx"
VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/cmake/vcpkg-triplets/default"
# VCPKG_BINARY_SOURCES: "default" # https://learn.microsoft.com/en-us/vcpkg/reference/binarycaching
- name: "Run build.py(arm64-osx)"
run: |
python ./tools/ci_build/build.py \
--build_dir "build/arm64-osx" \
--skip_submodule_sync \
--skip_tests \
--compile_no_warning_as_error \
--parallel \
--path_to_protoc_exe "${{ steps.protoc-detect.outputs.protoc_path }}" \
--osx_arch arm64 \
--use_vcpkg \
--cmake_extra_defines "CMAKE_TOOLCHAIN_FILE:FILEPATH=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" \
--cmake_extra_defines "VCPKG_TARGET_TRIPLET=arm64-osx" \
--cmake_extra_defines "VCPKG_INSTALLED_DIR:PATH=${{ github.workspace }}/.build" \
--cmake_extra_defines "VCPKG_INSTALL_OPTIONS=--x-feature=tests"
env:
VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/cmake/vcpkg-triplets/default"
shell: bash
Objective-C-StaticAnalysis:
runs-on: macos-14
env:
xcode_version: 15.2
timeout-minutes: 30
steps:
- uses: actions/setup-python@v5
with:
python-version: ${{ env.python_version }}
- name: Use Xcode ${{ env.xcode_version }}
shell: bash
run: |
XCODE_DEVELOPER_DIR="/Applications/Xcode_${{ env.xcode_version }}.app/Contents/Developer"
sudo xcode-select --switch "${XCODE_DEVELOPER_DIR}"
- uses: actions/checkout@v4
- name: Generate compile_commands.json and ONNX protobuf files
shell: bash
run: |
python ./tools/ci_build/build.py \
--build_dir ./build \
--cmake_generator "Unix Makefiles" \
--config Debug \
--build_shared_lib \
--use_coreml \
--build_objc \
--enable_training_apis \
--cmake_extra_defines CMAKE_EXPORT_COMPILE_COMMANDS=ON \
--use_binskim_compliant_compile_flags \
--update \
--build --parallel \
--target onnx_proto
- name: Analyze Objective-C/C++ source code
shell: bash
run: |
CLANG_TIDY_CHECKS="-*,clang-analyzer-*"
"$(brew --prefix llvm@15)/bin/clang-tidy" \
-p=./build/Debug \
--checks="${CLANG_TIDY_CHECKS}" \
--warnings-as-errors="${CLANG_TIDY_CHECKS}" \
--header-filter="objectivec/include|objectivec|onnxruntime/core" \
./objectivec/*.mm \
./onnxruntime/core/platform/apple/logging/apple_log_sink.mm \
./onnxruntime/core/providers/coreml/model/*.mm