Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onnxruntime/build] Add new flag enable_generic_interface to build primary EPs by default #23342

Merged
merged 26 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
bedddca
add enable_tensorrt_interface and build ort.so only
jslhcl Dec 13, 2024
3cbbbf3
add enable_openvino_interface parameter in build option and e2e test,…
jslhcl Dec 16, 2024
6e2f5d5
fix the runtime error and now trt and openvino can run together
jslhcl Dec 17, 2024
18950fb
Experimental changes to remove the IHV SDK dependencies
Jan 10, 2025
17bceec
Incorporate code review comments and simply the changes
Jan 10, 2025
558df87
Remove unncessary comments
Jan 10, 2025
80c2c62
Fix cmake toolset settings
Jan 11, 2025
13952af
Fix toolset settings in CMake
Jan 13, 2025
b971005
fix comments
jslhcl Jan 14, 2025
f53f373
Fixing lintrunner warnings ( lintrunner -a)
Jan 14, 2025
18984d8
Few more issues
Jan 15, 2025
720280f
Fix Pipeline issues with QNN
Jan 15, 2025
73c175c
Incorporate code review comments
Jan 18, 2025
f8b75b2
fix function definition
Jan 21, 2025
8210cd0
fix python syntax
Jan 21, 2025
2115536
Deleted test files
Jan 21, 2025
955bbae
Fix build interface logic
Jan 21, 2025
7cfa2e2
fix code comments after rebase
Jan 22, 2025
c8ae7e6
Revert "accidental cmake/external/onnx changes"
Jan 22, 2025
bb115e5
Avoid building qnn-ep when generic interface used
Jan 22, 2025
1a4f582
Add explicit build error when qnn-ep is build as static library
Jan 22, 2025
9857e34
Fix codeQL warnings
Jan 23, 2025
9f52af7
Remove commented code and enable interface for all eps by default
Jan 24, 2025
6761a93
Update tools/ci_build/build.py
karim-vad Jan 24, 2025
453a17a
Add ci pipline build step for windows
Jan 27, 2025
2bb2a3a
Merge remote-tracking branch 'origin/main' into dev/kvadsariya/wcr
Jan 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF)
option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for threadpool." OFF)
option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF)

option(onnxruntime_USE_TENSORRT_INTERFACE "Build ONNXRuntime shared lib which is compatible with TensorRT EP interface" OFF)
karim-vad marked this conversation as resolved.
Show resolved Hide resolved
option(onnxruntime_USE_CUDA_INTERFACE "Build ONNXRuntime shared lib which is compatible with Cuda EP interface" OFF)
option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is compatible with OpenVINO EP interface" OFF)
option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF)
option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF)

# ENABLE_TRAINING includes all training functionality
# The following 2 entry points
# 1. ORTModule
Expand Down Expand Up @@ -703,7 +709,7 @@ if (WIN32)
# structure was padded due to __declspec(align())
list(APPEND ORT_WARNING_FLAGS "/wd4324")
# warning C4800: Implicit conversion from 'X' to bool. Possible information loss
if (onnxruntime_USE_OPENVINO)
if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE)
list(APPEND ORT_WARNING_FLAGS "/wd4800")
endif()
# operator 'operator-name': deprecated between enumerations of different types
Expand Down Expand Up @@ -864,7 +870,7 @@ else()
set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
endif()

if (onnxruntime_USE_CUDA)
if (onnxruntime_USE_CUDA OR onnxruntime_USE_CUDA_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda)
Expand All @@ -888,7 +894,7 @@ if (onnxruntime_USE_CUDA)
endif()
endif()

if (onnxruntime_USE_VITISAI)
if (onnxruntime_USE_VITISAI OR onnxruntime_USE_VITISAI_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_VITISAI=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_VITISAI=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES vitisai)
Expand All @@ -898,12 +904,12 @@ if (onnxruntime_USE_DNNL)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES dnnl)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_DNNL=1)
endif()
if (onnxruntime_USE_OPENVINO)
if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_OPENVINO=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES openvino)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_OPENVINO=1)
endif()
if (onnxruntime_USE_TENSORRT)
if (onnxruntime_USE_TENSORRT OR onnxruntime_USE_TENSORRT_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_TENSORRT=1)
#TODO: remove the following line and change the test code in onnxruntime_shared_lib_test to use the new EP API.
list(APPEND ONNXRUNTIME_PROVIDER_NAMES tensorrt)
Expand All @@ -929,7 +935,7 @@ if (onnxruntime_USE_JSEP)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JSEP=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES js)
endif()
if (onnxruntime_USE_QNN)
if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_QNN=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_QNN=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES qnn)
Expand Down Expand Up @@ -957,7 +963,7 @@ if (onnxruntime_USE_QNN)
endif()
endif()

if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
if ((NOT onnxruntime_USE_QNN_INTERFACE) AND (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux"))
file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libQnn*.so"
"${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/Qnn*.dll"
"${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libHtpPrepare.so"
Expand Down Expand Up @@ -1416,7 +1422,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS)
)
endif()

if (onnxruntime_USE_OPENVINO)
if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE)

add_definitions(-DUSE_OPENVINO=1)

Expand All @@ -1429,7 +1435,7 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_CONFIG_GPU=1)
endif()

if (onnxruntime_USE_OPENVINO_CPU)
if (onnxruntime_USE_OPENVINO_CPU OR onnxruntime_USE_OPENVINO_INTERFACE) # OpenVino CPU interface is default built.
add_definitions(-DOPENVINO_CONFIG_CPU=1)
endif()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ struct ProviderHost {
virtual std::string demangle(const char* name) = 0;
virtual std::string demangle(const std::string& name) = 0;

#ifdef USE_CUDA
virtual std::unique_ptr<IAllocator> CreateCUDAAllocator(int16_t device_id, const char* name) = 0;
virtual std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(const char* name) = 0;
virtual std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() = 0;
Expand All @@ -190,7 +189,6 @@ struct ProviderHost {

virtual Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0;
virtual void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0;
#endif

#ifdef USE_MIGRAPHX
virtual std::unique_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0;
Expand All @@ -200,7 +198,6 @@ struct ProviderHost {
#ifdef USE_ROCM
virtual std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name) = 0;
virtual std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(const char* name) = 0;
virtual std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() = 0;

virtual void rocm__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) = 0;
virtual void rocm__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) = 0;
Expand Down Expand Up @@ -1256,9 +1253,7 @@ struct ProviderHost {
virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0;
#endif

#if defined(USE_CUDA) || defined(USE_ROCM)
virtual PhiloxGenerator& PhiloxGenerator__Default() = 0;
#endif

#ifdef ENABLE_TRAINING_TORCH_INTEROP
virtual void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) = 0;
Expand Down
7 changes: 2 additions & 5 deletions onnxruntime/core/session/provider_bridge_ort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,8 @@ struct ProviderHostImpl : ProviderHost {
void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); }
karim-vad marked this conversation as resolved.
Show resolved Hide resolved
void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); }

#ifdef USE_CUDA
std::unique_ptr<IAllocator> CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); }
std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); }
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); }

void cuda__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); }
void cuda__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); }
Expand All @@ -271,7 +269,6 @@ struct ProviderHostImpl : ProviderHost {

Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); }
void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); }
#endif

#ifdef USE_MIGRAPHX
std::unique_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); }
Expand All @@ -291,6 +288,8 @@ struct ProviderHostImpl : ProviderHost {

Status RocmCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_ROCM().RocmCall_false(retCode, exprString, libName, successCode, msg, file, line); }
void RocmCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_ROCM().RocmCall_true(retCode, exprString, libName, successCode, msg, file, line); }
#else
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); }
#endif

std::string GetEnvironmentVar(const std::string& var_name) override { return Env::Default().GetEnvironmentVar(var_name); }
Expand Down Expand Up @@ -1560,9 +1559,7 @@ struct ProviderHostImpl : ProviderHost {
training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); }
#endif

#if defined(USE_CUDA) || defined(USE_ROCM)
PhiloxGenerator& PhiloxGenerator__Default() override { return PhiloxGenerator::Default(); }
#endif

#ifdef ENABLE_TRAINING_TORCH_INTEROP
void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) override { p->PythonOpBase::Init(info); }
Expand Down
40 changes: 35 additions & 5 deletions tools/ci_build/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,12 @@
parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels")
parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.")

parser.add_argument(
"--enable_generic_interface",
adrianlizarraga marked this conversation as resolved.
Show resolved Hide resolved
action="store_true",
help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests",
)

if not is_windows():
parser.add_argument(
"--allow_running_as_root",
Expand Down Expand Up @@ -1042,6 +1048,12 @@
"-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"),
"-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER="
+ ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"),
# interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs
karim-vad marked this conversation as resolved.
Show resolved Hide resolved
"-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
"-Donnxruntime_USE_CUDA_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
"-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
"-Donnxruntime_USE_VITISAI_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
"-Donnxruntime_USE_QNN_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
# set vars for migraphx
"-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"),
"-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"),
Expand Down Expand Up @@ -1372,6 +1384,8 @@
cmake_args += ["-Donnxruntime_BUILD_QNN_EP_STATIC_LIB=ON"]
if args.android and args.use_qnn != "static_lib":
raise BuildError("Only support Android + QNN builds with QNN EP built as a static library.")
if args.use_qnn == "static_lib" and args.enable_generic_interface:
raise BuildError("Generic ORT interface only supported with QNN EP built as a shared library.")

if args.use_coreml:
cmake_args += ["-Donnxruntime_USE_COREML=ON"]
Expand Down Expand Up @@ -1529,6 +1543,12 @@
"-Donnxruntime_USE_FULL_PROTOBUF=ON",
]

# When this flag is enabled, that means we only build ONNXRuntime shared library, expecting some compatible EP
# shared lib being build in a seperate process. So we skip the test for now as ONNXRuntime shared lib built under

Check warning on line 1547 in tools/ci_build/build.py

View workflow job for this annotation

GitHub Actions / Optional Lint

[misspell] reported by reviewdog 🐶 "seperate" is a misspelling of "separate" Raw Output: ./tools/ci_build/build.py:1547:34: "seperate" is a misspelling of "separate"
# this flag is not expected to work alone
if args.enable_generic_interface:
jslhcl marked this conversation as resolved.
Show resolved Hide resolved
cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"]

if args.enable_lazy_tensor:
import torch

Expand Down Expand Up @@ -2649,6 +2669,9 @@
# Disable ONNX Runtime's builtin memory checker
args.disable_memleak_checker = True

if args.enable_generic_interface:
args.test = False

# If there was no explicit argument saying what to do, default
# to update, build and test (for native builds).
if not (args.update or args.clean or args.build or args.test or args.gen_doc):
Expand Down Expand Up @@ -2752,7 +2775,10 @@
source_dir = os.path.normpath(os.path.join(script_dir, "..", ".."))

# if using cuda, setup cuda paths and env vars
cuda_home, cudnn_home = setup_cuda_vars(args)
cuda_home = ""
cudnn_home = ""
if args.use_cuda:
cuda_home, cudnn_home = setup_cuda_vars(args)

mpi_home = args.mpi_home
nccl_home = args.nccl_home
Expand All @@ -2765,10 +2791,14 @@
armnn_home = args.armnn_home
armnn_libs = args.armnn_libs

qnn_home = args.qnn_home
qnn_home = ""
if args.use_qnn:
qnn_home = args.qnn_home

# if using tensorrt, setup tensorrt paths
tensorrt_home = setup_tensorrt_vars(args)
tensorrt_home = ""
if args.use_tensorrt:
tensorrt_home = setup_tensorrt_vars(args)

# if using migraphx, setup migraphx paths
migraphx_home = setup_migraphx_vars(args)
Expand Down Expand Up @@ -2853,9 +2883,9 @@
toolset = "host=" + host_arch + ",version=" + args.msvc_toolset
else:
toolset = "host=" + host_arch
if args.cuda_version:
if args.use_cuda and args.cuda_version:
toolset += ",cuda=" + args.cuda_version
elif args.cuda_home:
elif args.use_cuda and args.cuda_home:
toolset += ",cuda=" + args.cuda_home
if args.windows_sdk_version:
target_arch += ",version=" + args.windows_sdk_version
Expand Down
19 changes: 19 additions & 0 deletions tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,25 @@ stages:
WITH_CACHE: false
MachinePool: 'onnxruntime-Win-CPU-2022'

- stage: x64_release_ep_generic_interface
dependsOn: []
jobs:
- template: templates/jobs/win-ci-vs-2022-job.yml
parameters:
BuildConfig: 'RelWithDebInfo'
buildArch: x64
additionalBuildFlags: --enable_generic_interface
msbuildPlatform: x64
isX86: false
job_name_suffix: x64_release_ep_generic_interface
RunOnnxRuntimeTests: false # --enable_generic_interface does not build tests
EnablePython: false
isTraining: false
ORT_EP_NAME: CPU
GenerateDocumentation: false
WITH_CACHE: false
MachinePool: 'onnxruntime-Win-CPU-2022'

- stage: x86_release
dependsOn: []
jobs:
Expand Down
Loading