From bedddca3fe70566ccb1b5a5161d2cf4091e76af3 Mon Sep 17 00:00:00 2001 From: Lei Cao Date: Fri, 13 Dec 2024 15:36:32 +0000 Subject: [PATCH 01/25] add enable_tensorrt_interface and build ort.so only --- cmake/onnxruntime_providers.cmake | 4 ++-- tools/ci_build/build.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 67fa48b28278d..554fafe3d63c7 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -120,7 +120,7 @@ if(onnxruntime_USE_SNPE) endif() include(onnxruntime_providers_cpu.cmake) -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_TRT_INTERFACE)) include(onnxruntime_providers_cuda.cmake) endif() @@ -128,7 +128,7 @@ if (onnxruntime_USE_DNNL) include(onnxruntime_providers_dnnl.cmake) endif() -if (onnxruntime_USE_TENSORRT) +if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_TRT_INTERFACE)) include(onnxruntime_providers_tensorrt.cmake) endif() diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index bce7552854a4c..52f70a68119a7 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -782,6 +782,8 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels") parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") + parser.add_argument("--enable_tensorrt_interface", action="store_true", help="build ORT shared library and compatible bridge with tensorrt, but not TRT EP nor tests") + if not is_windows(): parser.add_argument( "--allow_running_as_root", @@ -1042,6 +1044,7 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"), "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), + "-Donnxruntime_ENABLE_TRT_INTERFACE=" + ("ON" if args.enable_tensorrt_interface else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -1529,6 +1532,9 @@ def generate_build_tree( "-Donnxruntime_USE_FULL_PROTOBUF=ON", ] + if args.enable_tensorrt_interface: + cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"] + if args.enable_lazy_tensor: import torch @@ -2649,6 +2655,9 @@ def main(): # Disable ONNX Runtime's builtin memory checker args.disable_memleak_checker = True + if args.enable_tensorrt_interface: + args.use_tensorrt, args.test = True, False + # If there was no explicit argument saying what to do, default # to update, build and test (for native builds). if not (args.update or args.clean or args.build or args.test or args.gen_doc): From 3cbbbf3518b5dcd5c8e5977ca33baf01ea7e18ad Mon Sep 17 00:00:00 2001 From: jslhcl Date: Mon, 16 Dec 2024 07:23:29 -0800 Subject: [PATCH 02/25] add enable_openvino_interface parameter in build option and e2e test, now a runtime error 'LoadLibrary failed with error 1114' when loading onnxruntime_providers_openvino.dll --- cmake/onnxruntime_providers.cmake | 2 +- samples/GenericInterface/CMakeLists.txt | 13 +++++ samples/GenericInterface/test.cpp | 72 +++++++++++++++++++++++++ tools/ci_build/build.py | 6 ++- 4 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 samples/GenericInterface/CMakeLists.txt create mode 100644 samples/GenericInterface/test.cpp diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 554fafe3d63c7..621526d5086d4 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -136,7 +136,7 @@ if (onnxruntime_USE_VITISAI) include(onnxruntime_providers_vitisai.cmake) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_OPENVINO_INTERFACE)) include(onnxruntime_providers_openvino.cmake) endif() diff --git a/samples/GenericInterface/CMakeLists.txt b/samples/GenericInterface/CMakeLists.txt new file mode 100644 index 0000000000000..29c141adbbab5 --- /dev/null +++ b/samples/GenericInterface/CMakeLists.txt @@ -0,0 +1,13 @@ +# usage: +# cd build/ +# cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug +# cmake --build ./ +# NOTE: For Windows, copy onnxruntime.dll and onnxruntime.pdb into the same folder of TestOutTreeEp.exe, otherwise, during runtime, +# it will search the default system path (C:\Windows\System32) for onnxruntime.dll +cmake_minimum_required(VERSION 3.26) +project(GenericOrtEpInterface) +add_executable(GenericOrtEpInterface test.cpp) + +target_include_directories(GenericOrtEpInterface PUBLIC "../../include/onnxruntime") +target_link_libraries(GenericOrtEpInterface PUBLIC "C:/Users/leca/source/onnxruntime3/samples/GenericInterface/build/Debug/onnxruntime.lib") + diff --git a/samples/GenericInterface/test.cpp b/samples/GenericInterface/test.cpp new file mode 100644 index 0000000000000..64cb95e2b8388 --- /dev/null +++ b/samples/GenericInterface/test.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include "core/session/onnxruntime_c_api.h" + +const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); + +inline void THROW_ON_ERROR(OrtStatus* status) { + if (status != nullptr) { + std::cout<<"ErrorMessage:"<GetErrorMessage(status)<<"\n"; + abort(); + } +} + +void RunRelu(const OrtApi* g_ort, OrtEnv* p_env, OrtSessionOptions* so) { + OrtSession* session = nullptr; +#ifdef _WIN32 + THROW_ON_ERROR(g_ort->CreateSession(p_env, L"C:/share/models/relu/Relu.onnx", so, &session)); +#else + THROW_ON_ERROR(g_ort->CreateSession(p_env, "/home/leca/code/onnxruntime/samples/c_test/Relu.onnx", so, &session)); +#endif + + OrtMemoryInfo* memory_info = nullptr; + THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); + float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f}; + const size_t input_len = 4 * sizeof(float); + const int64_t input_shape[] = {4}; + const size_t shape_len = sizeof(input_shape)/sizeof(input_shape[0]); + + OrtValue* input_tensor = nullptr; + THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); + + const char* input_names[] = {"x"}; + const char* output_names[] = {"graphOut"}; + OrtValue* output_tensor = nullptr; + THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); + + float* output_tensor_data = nullptr; + THROW_ON_ERROR(g_ort->GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); + std::cout<<"Result:\n"; + for (size_t i = 0; i < 4; i++) std::cout<>a; + + OrtEnv* p_env = nullptr; + OrtLoggingLevel log_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR;//OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO; + THROW_ON_ERROR(g_ort->CreateEnv(log_level, "", &p_env)); + OrtSessionOptions* so = nullptr; + THROW_ON_ERROR(g_ort->CreateSessionOptions(&so)); + + OrtTensorRTProviderOptionsV2* tensorrt_options = nullptr; + THROW_ON_ERROR(g_ort->CreateTensorRTProviderOptions(&tensorrt_options)); + THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_TensorRT_V2(so, tensorrt_options)); + + std::unordered_map ov_options; + ov_options["device_type"] = "CPU"; + ov_options["precision"] = "FP32"; + std::vector keys, values; + for (const auto& entry : ov_options) { + keys.push_back(entry.first.c_str()); + values.push_back(entry.second.c_str()); + } + THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_OpenVINO_V2(so, keys.data(), values.data(), keys.size())); + + RunRelu(g_ort, p_env, so); + + return 0; +} diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 52f70a68119a7..3140957a97b61 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -783,6 +783,7 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") parser.add_argument("--enable_tensorrt_interface", action="store_true", help="build ORT shared library and compatible bridge with tensorrt, but not TRT EP nor tests") + parser.add_argument("--enable_openvino_interface", action="store_true", help="build ORT shared library and compatible bridge with OpenVINO, but not OpenVINO EP nor tests") if not is_windows(): parser.add_argument( @@ -1045,6 +1046,7 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), "-Donnxruntime_ENABLE_TRT_INTERFACE=" + ("ON" if args.enable_tensorrt_interface else "OFF"), + "-Donnxruntime_ENABLE_OPENVINO_INTERFACE=" + ("ON" if args.enable_openvino_interface else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -1532,7 +1534,7 @@ def generate_build_tree( "-Donnxruntime_USE_FULL_PROTOBUF=ON", ] - if args.enable_tensorrt_interface: + if args.enable_tensorrt_interface or args.enable_openvino_interface: cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"] if args.enable_lazy_tensor: @@ -2657,6 +2659,8 @@ def main(): if args.enable_tensorrt_interface: args.use_tensorrt, args.test = True, False + if args.enable_openvino_interface: + args.use_openvino, args.test = "CPU", False # If there was no explicit argument saying what to do, default # to update, build and test (for native builds). From 6e2f5d5234e172e6827501abd983fa1d55e9157c Mon Sep 17 00:00:00 2001 From: jslhcl Date: Tue, 17 Dec 2024 07:39:21 -0800 Subject: [PATCH 03/25] fix the runtime error and now trt and openvino can run together --- .../core/providers/shared_library/provider_interfaces.h | 8 ++++---- onnxruntime/core/session/provider_bridge_ort.cc | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 962d10d8952d6..1f973c5ceeded 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -178,7 +178,7 @@ struct ProviderHost { virtual std::string demangle(const char* name) = 0; virtual std::string demangle(const std::string& name) = 0; -#ifdef USE_CUDA +//#ifdef USE_CUDA virtual std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateCUDAPinnedAllocator(const char* name) = 0; virtual std::unique_ptr CreateGPUDataTransfer() = 0; @@ -190,7 +190,7 @@ struct ProviderHost { virtual Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; virtual void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; -#endif +//#endif #ifdef USE_MIGRAPHX virtual std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0; @@ -1256,9 +1256,9 @@ struct ProviderHost { virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0; #endif -#if defined(USE_CUDA) || defined(USE_ROCM) +//#if defined(USE_CUDA) || defined(USE_ROCM) virtual PhiloxGenerator& PhiloxGenerator__Default() = 0; -#endif +//#endif #ifdef ENABLE_TRAINING_TORCH_INTEROP virtual void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) = 0; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index d7c6dab72fde8..7c1423c4a3dbf 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -258,7 +258,7 @@ struct ProviderHostImpl : ProviderHost { void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); } void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); } -#ifdef USE_CUDA +//#ifdef USE_CUDA std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); } std::unique_ptr CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); } std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } @@ -271,7 +271,7 @@ struct ProviderHostImpl : ProviderHost { Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); } void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); } -#endif +//#endif #ifdef USE_MIGRAPHX std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); } @@ -1560,9 +1560,9 @@ struct ProviderHostImpl : ProviderHost { training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); } #endif -#if defined(USE_CUDA) || defined(USE_ROCM) +//#if defined(USE_CUDA) || defined(USE_ROCM) PhiloxGenerator& PhiloxGenerator__Default() override { return PhiloxGenerator::Default(); } -#endif +//#endif #ifdef ENABLE_TRAINING_TORCH_INTEROP void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) override { p->PythonOpBase::Init(info); } From 18950fb5cb64f9cc1776c9c7a887b7cf9de0b163 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Fri, 10 Jan 2025 09:41:25 -0800 Subject: [PATCH 04/25] Experimental changes to remove the IHV SDK dependencies when building onnxruntime.dll onnxruntime_shared.dll --- cmake/CMakeLists.txt | 33 ++++++------ cmake/external/onnx | 2 +- .../external/onnxruntime_external_deps.cmake | 5 +- cmake/onnxruntime.cmake | 6 +-- cmake/onnxruntime_providers.cmake | 13 ++--- cmake/onnxruntime_python.cmake | 8 +-- tools/ci_build/build.py | 50 ++++++++++++------- 7 files changed, 67 insertions(+), 50 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index b332583035890..cff621ceb4169 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -703,7 +703,7 @@ if (WIN32) # structure was padded due to __declspec(align()) list(APPEND ORT_WARNING_FLAGS "/wd4324") # warning C4800: Implicit conversion from 'X' to bool. Possible information loss - if (onnxruntime_USE_OPENVINO) + if (onnxruntime_USE_OPENVINO) #TODO[Karim] applys to all projects... list(APPEND ORT_WARNING_FLAGS "/wd4800") endif() # operator 'operator-name': deprecated between enumerations of different types @@ -831,7 +831,7 @@ set(ONNXRUNTIME_PROVIDER_NAMES cpu) set(ORT_PROVIDER_FLAGS) set(ORT_PROVIDER_CMAKE_FLAGS) -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) enable_language(CUDA) message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}") @@ -866,7 +866,7 @@ endif() if (onnxruntime_USE_CUDA) list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1) - list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) + list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) #TODO[Karim] Not used anywhere list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda) if (onnxruntime_USE_FLASH_ATTENTION) @@ -929,7 +929,7 @@ if (onnxruntime_USE_JSEP) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JSEP=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES js) endif() -if (onnxruntime_USE_QNN) +if (onnxruntime_USE_QNN) #TODO[Karim] list(APPEND ORT_PROVIDER_FLAGS -DUSE_QNN=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_QNN=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES qnn) @@ -1114,14 +1114,15 @@ function(onnxruntime_set_compile_flags target_name) endif() set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON) - if (onnxruntime_USE_CUDA) + + if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) # Suppress a "conversion_function_not_usable" warning in gsl/span target_compile_options(${target_name} PRIVATE "$<$:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">") target_compile_definitions(${target_name} PRIVATE -DDISABLE_CUSPARSE_DEPRECATED) endif() if (MSVC) foreach(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>") + #target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>") endforeach() foreach(onnxruntime_external_lib IN LISTS onnxruntime_EXTERNAL_LIBRARIES) @@ -1179,7 +1180,7 @@ function(onnxruntime_set_compile_flags target_name) target_compile_options(${target_name} PRIVATE "-Wno-unused-function") endif() endif() - foreach(ORT_FLAG ${ORT_PROVIDER_FLAGS}) + foreach(ORT_FLAG ${ORT_PROVIDER_FLAGS}) #TODO[Karim] applies to all projects... target_compile_definitions(${target_name} PRIVATE ${ORT_FLAG}) endforeach() if (HAS_DEPRECATED_COPY) @@ -1192,7 +1193,7 @@ function(onnxruntime_set_compile_flags target_name) if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 13 AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 12) target_compile_options(${target_name} PRIVATE "$<$:-Wno-maybe-uninitialized>") endif() - if (onnxruntime_USE_CUDA) + if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) foreach(FLAG ${ORT_WARNING_FLAGS}) target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options ${FLAG}>") endforeach() @@ -1416,7 +1417,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS) ) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO) #TODO[Karim] applies to both onnxruntime.dll and onnxruntime_shared.dll add_definitions(-DUSE_OPENVINO=1) @@ -1473,7 +1474,7 @@ if (onnxruntime_USE_OPENVINO) endif() -if (onnxruntime_USE_VITISAI) +if (onnxruntime_USE_VITISAI) #TODO[Karim] set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_LIST_DIR}") endif() @@ -1495,7 +1496,7 @@ string(APPEND ORT_BUILD_INFO ", cmake cxx flags: ${CMAKE_CXX_FLAGS}") configure_file(onnxruntime_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime_config.h) get_property(onnxruntime_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) set(CMAKE_CUDA_STANDARD 17) if(onnxruntime_CUDA_HOME) @@ -1769,17 +1770,17 @@ if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_BUILD_APPLE_FRAMEWORK) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime) endif() -if (onnxruntime_BUILD_JAVA) +if (onnxruntime_BUILD_JAVA) #TODO[Karim] *cmake files included below uses onnxruntime_USE_CUDA, USE_QNN etc variables, message(STATUS "Java Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_java) endif() -if (onnxruntime_BUILD_NODEJS) +if (onnxruntime_BUILD_NODEJS) #TODO[Karim] message(STATUS "Node.js Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_nodejs) endif() -if (onnxruntime_ENABLE_PYTHON) +if (onnxruntime_ENABLE_PYTHON) #TODO[Karim] message(STATUS "Python Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_python) endif() @@ -1789,7 +1790,7 @@ if (onnxruntime_BUILD_OBJC) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_objectivec) endif() -if (onnxruntime_BUILD_UNIT_TESTS) +if (onnxruntime_BUILD_UNIT_TESTS) #TODO[Karim] list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_unittests) endif() @@ -1837,7 +1838,7 @@ if (WIN32 AND NOT GDK_PLATFORM AND NOT CMAKE_CROSSCOMPILING) endif() foreach(target_name ${ONNXRUNTIME_CMAKE_FILES}) - include(${target_name}.cmake) + include(${target_name}.cmake) #TODO[Karim] endforeach() if (UNIX) option(BUILD_PKGCONFIG_FILES "Build and install pkg-config files" ON) diff --git a/cmake/external/onnx b/cmake/external/onnx index b8baa84466864..595228d99e397 160000 --- a/cmake/external/onnx +++ b/cmake/external/onnx @@ -1 +1 @@ -Subproject commit b8baa8446686496da4cc8fda09f2b6fe65c2a02c +Subproject commit 595228d99e3977ac27cb79d5963adda262af99ad diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 761ce47582ee5..b4ac46ab758a1 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -405,7 +405,7 @@ if ((CPUINFO_SUPPORTED OR onnxruntime_USE_XNNPACK) AND NOT ANDROID) endif() endif() -if(onnxruntime_USE_CUDA) +if(onnxruntime_USE_CUDA) #TODO[Karim] FetchContent_Declare( GSL URL ${DEP_URL_microsoft_gsl} @@ -728,7 +728,8 @@ if (onnxruntime_USE_WEBGPU) endif() set(onnxruntime_LINK_DIRS) -if (onnxruntime_USE_CUDA) + +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) find_package(CUDAToolkit REQUIRED) if(onnxruntime_CUDNN_HOME) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 78edb4179fafd..5398050c7a139 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -37,7 +37,7 @@ function(get_c_cxx_api_headers HEADERS_VAR) endif() # need to add header files for enabled EPs - foreach(f ${ONNXRUNTIME_PROVIDER_NAMES}) + foreach(f ${ONNXRUNTIME_PROVIDER_NAMES}) #TODO[Karim] # The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory # with onnxruntime_c_api.h . Most other EPs probably also do not work in this way. if((NOT f STREQUAL cuda) AND (NOT f STREQUAL rocm)) @@ -66,12 +66,12 @@ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") add_custom_command(OUTPUT ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c COMMAND ${Python_EXECUTABLE} "${REPO_ROOT}/tools/ci_build/gen_def.py" --version_file "${ONNXRUNTIME_ROOT}/../VERSION_NUMBER" --src_root "${ONNXRUNTIME_ROOT}" - --config ${ONNXRUNTIME_PROVIDER_NAMES} --style=${OUTPUT_STYLE} --output ${SYMBOL_FILE} + --config ${ONNXRUNTIME_PROVIDER_NAMES} --style=${OUTPUT_STYLE} --output ${SYMBOL_FILE} #TODO[Karim] --output_source ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c DEPENDS ${SYMBOL_FILES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -add_custom_target(onnxruntime_generate_def ALL DEPENDS ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c) +add_custom_target(onnxruntime_generate_def ALL DEPENDS ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c) #TODO[Karim] endif() if(WIN32) onnxruntime_add_shared_library(onnxruntime diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 621526d5086d4..9b5b4f4ef3e7d 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -59,10 +59,10 @@ function(add_op_reduction_include_dirs target) endfunction() -if(onnxruntime_USE_VITISAI) +if(onnxruntime_USE_VITISAI) #TODO[Karim] seems like not used?? set(PROVIDERS_VITISAI onnxruntime_providers_vitisai) endif() -if(onnxruntime_USE_CUDA) +if(onnxruntime_USE_CUDA) #TODO[Karim] set(PROVIDERS_CUDA onnxruntime_providers_cuda) endif() if(onnxruntime_USE_COREML) @@ -120,7 +120,7 @@ if(onnxruntime_USE_SNPE) endif() include(onnxruntime_providers_cpu.cmake) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_TRT_INTERFACE)) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) include(onnxruntime_providers_cuda.cmake) endif() @@ -128,15 +128,15 @@ if (onnxruntime_USE_DNNL) include(onnxruntime_providers_dnnl.cmake) endif() -if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_TRT_INTERFACE)) +if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) include(onnxruntime_providers_tensorrt.cmake) endif() -if (onnxruntime_USE_VITISAI) +if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) include(onnxruntime_providers_vitisai.cmake) endif() -if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_OPENVINO_INTERFACE)) +if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) include(onnxruntime_providers_openvino.cmake) endif() @@ -161,6 +161,7 @@ if (onnxruntime_USE_JSEP) endif() if (onnxruntime_USE_QNN) + message("Hitting qnn check") include(onnxruntime_providers_qnn.cmake) endif() diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 15a2862cede0c..9dfcab1e0dd1a 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -891,7 +891,7 @@ if (onnxruntime_USE_DNNL) ) endif() -if (onnxruntime_USE_VITISAI) +if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -901,7 +901,7 @@ if (onnxruntime_USE_VITISAI) ) endif() -if (onnxruntime_USE_TENSORRT) +if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -921,7 +921,7 @@ if (onnxruntime_USE_MIGRAPHX) ) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -944,7 +944,7 @@ if (DEFINED ENV{OPENVINO_MANYLINUX}) ) endif() -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 3140957a97b61..ef116d401e962 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -782,8 +782,7 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels") parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") - parser.add_argument("--enable_tensorrt_interface", action="store_true", help="build ORT shared library and compatible bridge with tensorrt, but not TRT EP nor tests") - parser.add_argument("--enable_openvino_interface", action="store_true", help="build ORT shared library and compatible bridge with OpenVINO, but not OpenVINO EP nor tests") + parser.add_argument("--enable_generic_interface", action="store_true", help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests") if not is_windows(): parser.add_argument( @@ -1045,8 +1044,7 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"), "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), - "-Donnxruntime_ENABLE_TRT_INTERFACE=" + ("ON" if args.enable_tensorrt_interface else "OFF"), - "-Donnxruntime_ENABLE_OPENVINO_INTERFACE=" + ("ON" if args.enable_openvino_interface else "OFF"), + "-Donnxruntime_ENABLE_GENERIC_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -1211,6 +1209,8 @@ def generate_build_tree( cmake_args.append("-Donnxruntime_ENABLE_WEBASSEMBLY_SIMD=" + ("ON" if args.enable_wasm_simd else "OFF")) if args.use_migraphx: cmake_args.append("-Donnxruntime_MIGRAPHX_HOME=" + migraphx_home) + + ''' if args.use_cuda: nvcc_threads = number_of_nvcc_threads(args) cmake_args.append("-Donnxruntime_NVCC_THREADS=" + str(nvcc_threads)) @@ -1221,11 +1221,14 @@ def generate_build_tree( f"Add '--disable_types float8' to your command line. See option disable_types." ) cmake_args.append(f"-DCMAKE_CUDA_COMPILER={cuda_home}/bin/nvcc") + ''' + if args.use_rocm: cmake_args.append("-Donnxruntime_ROCM_HOME=" + rocm_home) cmake_args.append("-Donnxruntime_ROCM_VERSION=" + args.rocm_version) - if args.use_tensorrt: - cmake_args.append("-Donnxruntime_TENSORRT_HOME=" + tensorrt_home) + + #if args.use_tensorrt: + # cmake_args.append("-Donnxruntime_TENSORRT_HOME=" + tensorrt_home) if args.use_cuda: add_default_definition(cmake_extra_defines, "onnxruntime_USE_CUDA", "ON") @@ -1276,8 +1279,8 @@ def generate_build_tree( if nccl_home and os.path.exists(nccl_home): cmake_args += ["-Donnxruntime_NCCL_HOME=" + nccl_home] - if qnn_home and os.path.exists(qnn_home): - cmake_args += ["-Donnxruntime_QNN_HOME=" + qnn_home] + #if qnn_home and os.path.exists(qnn_home): + # cmake_args += ["-Donnxruntime_QNN_HOME=" + qnn_home] if snpe_root and os.path.exists(snpe_root): cmake_args += ["-DSNPE_ROOT=" + snpe_root] @@ -1369,8 +1372,8 @@ def generate_build_tree( cmake_args += ["-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=" + args.xcode_code_signing_team_id] if args.use_qnn: - if args.qnn_home is None or os.path.exists(args.qnn_home) is False: - raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") + #if args.qnn_home is None or os.path.exists(args.qnn_home) is False: + # raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") cmake_args += ["-Donnxruntime_USE_QNN=ON"] if args.use_qnn == "static_lib": @@ -1534,7 +1537,7 @@ def generate_build_tree( "-Donnxruntime_USE_FULL_PROTOBUF=ON", ] - if args.enable_tensorrt_interface or args.enable_openvino_interface: + if args.enable_generic_interface: cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"] if args.enable_lazy_tensor: @@ -2657,10 +2660,12 @@ def main(): # Disable ONNX Runtime's builtin memory checker args.disable_memleak_checker = True - if args.enable_tensorrt_interface: - args.use_tensorrt, args.test = True, False - if args.enable_openvino_interface: - args.use_openvino, args.test = "CPU", False + if args.enable_generic_interface: + args.test = False + args.use_tensorrt = True + args.use_openvino = "CPU" + args.use_vitisai = True + #args.use_qnn = True #defaults should be set based on arm64 vs x64 builds... # If there was no explicit argument saying what to do, default # to update, build and test (for native builds). @@ -2765,7 +2770,9 @@ def main(): source_dir = os.path.normpath(os.path.join(script_dir, "..", "..")) # if using cuda, setup cuda paths and env vars - cuda_home, cudnn_home = setup_cuda_vars(args) + #cuda_home, cudnn_home = setup_cuda_vars(args) + cuda_home = "" + cudnn_home = "" mpi_home = args.mpi_home nccl_home = args.nccl_home @@ -2779,9 +2786,11 @@ def main(): armnn_libs = args.armnn_libs qnn_home = args.qnn_home + qnn_home = "" # if using tensorrt, setup tensorrt paths - tensorrt_home = setup_tensorrt_vars(args) + #tensorrt_home = setup_tensorrt_vars(args) + tensorrt_home = "" # if using migraphx, setup migraphx paths migraphx_home = setup_migraphx_vars(args) @@ -2872,7 +2881,12 @@ def main(): toolset += ",cuda=" + args.cuda_home if args.windows_sdk_version: target_arch += ",version=" + args.windows_sdk_version - cmake_extra_args = ["-A", target_arch, "-T", toolset, "-G", args.cmake_generator] + + make_extra_args = ["-A", target_arch, "-G", args.cmake_generator] + + if (args.use_cuda and (not args.enable_generic_interface)): + cmake_extra_args += ["-T", toolset] + if args.enable_wcos: cmake_extra_defines.append("CMAKE_USER_MAKE_RULES_OVERRIDE=wcos_rules_override.cmake") From 17bceeca0ed88cddff9ea32fa435e3b94c9d2f66 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Fri, 10 Jan 2025 14:52:37 -0800 Subject: [PATCH 05/25] Incorporate code review comments and simply the changes by introducing EP specific Interface flags --- cmake/CMakeLists.txt | 30 ++++++++-------- cmake/adjust_global_compile_flags.cmake | 2 +- .../external/onnxruntime_external_deps.cmake | 4 +-- cmake/onnxruntime_providers.cmake | 17 +++------- cmake/onnxruntime_python.cmake | 8 ++--- tools/ci_build/build.py | 34 +++++++++---------- 6 files changed, 44 insertions(+), 51 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index cff621ceb4169..ecc5692c52330 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -703,7 +703,7 @@ if (WIN32) # structure was padded due to __declspec(align()) list(APPEND ORT_WARNING_FLAGS "/wd4324") # warning C4800: Implicit conversion from 'X' to bool. Possible information loss - if (onnxruntime_USE_OPENVINO) #TODO[Karim] applys to all projects... + if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) list(APPEND ORT_WARNING_FLAGS "/wd4800") endif() # operator 'operator-name': deprecated between enumerations of different types @@ -726,7 +726,7 @@ if (WIN32) set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /INCREMENTAL:NO") endif() endif() - if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) + if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) #TODO[Low] Any changes ? set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /LTCG") set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /LTCG") @@ -831,7 +831,7 @@ set(ONNXRUNTIME_PROVIDER_NAMES cpu) set(ORT_PROVIDER_FLAGS) set(ORT_PROVIDER_CMAKE_FLAGS) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) enable_language(CUDA) message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}") @@ -864,7 +864,7 @@ else() set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF) endif() -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA OR onnxruntime_USE_CUDA_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) #TODO[Karim] Not used anywhere list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda) @@ -888,7 +888,7 @@ if (onnxruntime_USE_CUDA) endif() endif() -if (onnxruntime_USE_VITISAI) +if (onnxruntime_USE_VITISAI OR onnxruntime_USE_VITISAI_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_VITISAI=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_VITISAI=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES vitisai) @@ -898,12 +898,12 @@ if (onnxruntime_USE_DNNL) list(APPEND ONNXRUNTIME_PROVIDER_NAMES dnnl) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_DNNL=1) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_OPENVINO=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES openvino) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_OPENVINO=1) endif() -if (onnxruntime_USE_TENSORRT) +if (onnxruntime_USE_TENSORRT OR onnxruntime_USE_TENSORRT_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_TENSORRT=1) #TODO: remove the following line and change the test code in onnxruntime_shared_lib_test to use the new EP API. list(APPEND ONNXRUNTIME_PROVIDER_NAMES tensorrt) @@ -929,7 +929,7 @@ if (onnxruntime_USE_JSEP) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JSEP=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES js) endif() -if (onnxruntime_USE_QNN) #TODO[Karim] +if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_QNN=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_QNN=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES qnn) @@ -957,7 +957,7 @@ if (onnxruntime_USE_QNN) #TODO[Karim] endif() endif() - if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + if ((NOT onnxruntime_USE_QNN_INTERFACE) AND (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")) file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libQnn*.so" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/Qnn*.dll" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libHtpPrepare.so" @@ -1115,7 +1115,7 @@ function(onnxruntime_set_compile_flags target_name) set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON) - if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) + if (onnxruntime_USE_CUDA) # Suppress a "conversion_function_not_usable" warning in gsl/span target_compile_options(${target_name} PRIVATE "$<$:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">") target_compile_definitions(${target_name} PRIVATE -DDISABLE_CUSPARSE_DEPRECATED) @@ -1193,7 +1193,7 @@ function(onnxruntime_set_compile_flags target_name) if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 13 AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 12) target_compile_options(${target_name} PRIVATE "$<$:-Wno-maybe-uninitialized>") endif() - if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) + if (onnxruntime_USE_CUDA) foreach(FLAG ${ORT_WARNING_FLAGS}) target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options ${FLAG}>") endforeach() @@ -1417,7 +1417,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS) ) endif() -if (onnxruntime_USE_OPENVINO) #TODO[Karim] applies to both onnxruntime.dll and onnxruntime_shared.dll +if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) add_definitions(-DUSE_OPENVINO=1) @@ -1430,7 +1430,7 @@ if (onnxruntime_USE_OPENVINO) #TODO[Karim] applies to both onnxruntime.dll and o add_definitions(-DOPENVINO_CONFIG_GPU=1) endif() - if (onnxruntime_USE_OPENVINO_CPU) + if (onnxruntime_USE_OPENVINO_CPU OR onnxruntime_USE_OPENVINO_INTERFACE) # OpenVino CPU interface is default built. add_definitions(-DOPENVINO_CONFIG_CPU=1) endif() @@ -1496,7 +1496,7 @@ string(APPEND ORT_BUILD_INFO ", cmake cxx flags: ${CMAKE_CXX_FLAGS}") configure_file(onnxruntime_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime_config.h) get_property(onnxruntime_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) set(CMAKE_CUDA_STANDARD 17) if(onnxruntime_CUDA_HOME) @@ -1770,7 +1770,7 @@ if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_BUILD_APPLE_FRAMEWORK) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime) endif() -if (onnxruntime_BUILD_JAVA) #TODO[Karim] *cmake files included below uses onnxruntime_USE_CUDA, USE_QNN etc variables, +if (onnxruntime_BUILD_JAVA) message(STATUS "Java Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_java) endif() diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake index 8b5a744e497a6..9b7bb159d76a4 100644 --- a/cmake/adjust_global_compile_flags.cmake +++ b/cmake/adjust_global_compile_flags.cmake @@ -274,7 +274,7 @@ if (MSVC) string(APPEND CMAKE_C_FLAGS " /arch:AVX512") endif() - if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) + if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) #TODO[Low] Any changes ? set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Gw /GL") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Gw /GL") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Gw /GL") diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index b4ac46ab758a1..7a7a25b2ab50b 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -405,7 +405,7 @@ if ((CPUINFO_SUPPORTED OR onnxruntime_USE_XNNPACK) AND NOT ANDROID) endif() endif() -if(onnxruntime_USE_CUDA) #TODO[Karim] +if(onnxruntime_USE_CUDA) #TODO[Low] Any changes? FetchContent_Declare( GSL URL ${DEP_URL_microsoft_gsl} @@ -729,7 +729,7 @@ endif() set(onnxruntime_LINK_DIRS) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) find_package(CUDAToolkit REQUIRED) if(onnxruntime_CUDNN_HOME) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 9b5b4f4ef3e7d..f03d120f4302d 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -59,12 +59,6 @@ function(add_op_reduction_include_dirs target) endfunction() -if(onnxruntime_USE_VITISAI) #TODO[Karim] seems like not used?? - set(PROVIDERS_VITISAI onnxruntime_providers_vitisai) -endif() -if(onnxruntime_USE_CUDA) #TODO[Karim] - set(PROVIDERS_CUDA onnxruntime_providers_cuda) -endif() if(onnxruntime_USE_COREML) set(PROVIDERS_COREML onnxruntime_providers_coreml coreml_proto) endif() @@ -120,7 +114,7 @@ if(onnxruntime_USE_SNPE) endif() include(onnxruntime_providers_cpu.cmake) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) include(onnxruntime_providers_cuda.cmake) endif() @@ -128,15 +122,15 @@ if (onnxruntime_USE_DNNL) include(onnxruntime_providers_dnnl.cmake) endif() -if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_TENSORRT) include(onnxruntime_providers_tensorrt.cmake) endif() -if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_VITISAI) include(onnxruntime_providers_vitisai.cmake) endif() -if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_OPENVINO) include(onnxruntime_providers_openvino.cmake) endif() @@ -160,8 +154,7 @@ if (onnxruntime_USE_JSEP) include(onnxruntime_providers_js.cmake) endif() -if (onnxruntime_USE_QNN) - message("Hitting qnn check") +if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE) #TODO[Low] Revisit when QNN EP becomes dynamic lib. include(onnxruntime_providers_qnn.cmake) endif() diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 9dfcab1e0dd1a..15a2862cede0c 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -891,7 +891,7 @@ if (onnxruntime_USE_DNNL) ) endif() -if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_VITISAI) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -901,7 +901,7 @@ if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) ) endif() -if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_TENSORRT) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -921,7 +921,7 @@ if (onnxruntime_USE_MIGRAPHX) ) endif() -if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_OPENVINO) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -944,7 +944,7 @@ if (DEFINED ENV{OPENVINO_MANYLINUX}) ) endif() -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index ef116d401e962..03710b795f7d7 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1017,6 +1017,8 @@ def generate_build_tree( disable_optional_type = "optional" in types_to_disable disable_sparse_tensors = "sparsetensor" in types_to_disable + enable_qnn_interface = True if((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) else False + cmake_args += [ "-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), "-Donnxruntime_GENERATE_TEST_REPORTS=ON", @@ -1044,7 +1046,11 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"), "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), - "-Donnxruntime_ENABLE_GENERIC_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + # interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs + "-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_VITISAI_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_QNN_INTERFACE=" + ("ON" if (args.enable_generic_interface and enable_qnn_interface) else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -1209,8 +1215,6 @@ def generate_build_tree( cmake_args.append("-Donnxruntime_ENABLE_WEBASSEMBLY_SIMD=" + ("ON" if args.enable_wasm_simd else "OFF")) if args.use_migraphx: cmake_args.append("-Donnxruntime_MIGRAPHX_HOME=" + migraphx_home) - - ''' if args.use_cuda: nvcc_threads = number_of_nvcc_threads(args) cmake_args.append("-Donnxruntime_NVCC_THREADS=" + str(nvcc_threads)) @@ -1221,14 +1225,11 @@ def generate_build_tree( f"Add '--disable_types float8' to your command line. See option disable_types." ) cmake_args.append(f"-DCMAKE_CUDA_COMPILER={cuda_home}/bin/nvcc") - ''' - if args.use_rocm: cmake_args.append("-Donnxruntime_ROCM_HOME=" + rocm_home) cmake_args.append("-Donnxruntime_ROCM_VERSION=" + args.rocm_version) - - #if args.use_tensorrt: - # cmake_args.append("-Donnxruntime_TENSORRT_HOME=" + tensorrt_home) + if args.use_tensorrt: + cmake_args.append("-Donnxruntime_TENSORRT_HOME=" + tensorrt_home) if args.use_cuda: add_default_definition(cmake_extra_defines, "onnxruntime_USE_CUDA", "ON") @@ -1279,8 +1280,8 @@ def generate_build_tree( if nccl_home and os.path.exists(nccl_home): cmake_args += ["-Donnxruntime_NCCL_HOME=" + nccl_home] - #if qnn_home and os.path.exists(qnn_home): - # cmake_args += ["-Donnxruntime_QNN_HOME=" + qnn_home] + if qnn_home and os.path.exists(qnn_home): + cmake_args += ["-Donnxruntime_QNN_HOME=" + qnn_home] if snpe_root and os.path.exists(snpe_root): cmake_args += ["-DSNPE_ROOT=" + snpe_root] @@ -1372,8 +1373,8 @@ def generate_build_tree( cmake_args += ["-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=" + args.xcode_code_signing_team_id] if args.use_qnn: - #if args.qnn_home is None or os.path.exists(args.qnn_home) is False: - # raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") + if args.qnn_home is None or os.path.exists(args.qnn_home) is False: + raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") cmake_args += ["-Donnxruntime_USE_QNN=ON"] if args.use_qnn == "static_lib": @@ -2662,10 +2663,6 @@ def main(): if args.enable_generic_interface: args.test = False - args.use_tensorrt = True - args.use_openvino = "CPU" - args.use_vitisai = True - #args.use_qnn = True #defaults should be set based on arm64 vs x64 builds... # If there was no explicit argument saying what to do, default # to update, build and test (for native builds). @@ -2773,6 +2770,8 @@ def main(): #cuda_home, cudnn_home = setup_cuda_vars(args) cuda_home = "" cudnn_home = "" + if args.use_cuda: + cuda_home, cudnn_home = setup_cuda_vars(args) mpi_home = args.mpi_home nccl_home = args.nccl_home @@ -2789,8 +2788,9 @@ def main(): qnn_home = "" # if using tensorrt, setup tensorrt paths - #tensorrt_home = setup_tensorrt_vars(args) tensorrt_home = "" + if args.use_tensorrt: + tensorrt_home = setup_tensorrt_vars(args) # if using migraphx, setup migraphx paths migraphx_home = setup_migraphx_vars(args) From 558df879d5f5c36f22611b1aa507e526633cb6c5 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Fri, 10 Jan 2025 15:04:42 -0800 Subject: [PATCH 06/25] Remove unncessary comments --- cmake/CMakeLists.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index ecc5692c52330..d0eb1e11094ce 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -866,7 +866,7 @@ endif() if (onnxruntime_USE_CUDA OR onnxruntime_USE_CUDA_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1) - list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) #TODO[Karim] Not used anywhere + list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda) if (onnxruntime_USE_FLASH_ATTENTION) @@ -1180,7 +1180,7 @@ function(onnxruntime_set_compile_flags target_name) target_compile_options(${target_name} PRIVATE "-Wno-unused-function") endif() endif() - foreach(ORT_FLAG ${ORT_PROVIDER_FLAGS}) #TODO[Karim] applies to all projects... + foreach(ORT_FLAG ${ORT_PROVIDER_FLAGS}) target_compile_definitions(${target_name} PRIVATE ${ORT_FLAG}) endforeach() if (HAS_DEPRECATED_COPY) @@ -1775,12 +1775,12 @@ if (onnxruntime_BUILD_JAVA) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_java) endif() -if (onnxruntime_BUILD_NODEJS) #TODO[Karim] +if (onnxruntime_BUILD_NODEJS) message(STATUS "Node.js Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_nodejs) endif() -if (onnxruntime_ENABLE_PYTHON) #TODO[Karim] +if (onnxruntime_ENABLE_PYTHON) message(STATUS "Python Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_python) endif() @@ -1790,7 +1790,7 @@ if (onnxruntime_BUILD_OBJC) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_objectivec) endif() -if (onnxruntime_BUILD_UNIT_TESTS) #TODO[Karim] +if (onnxruntime_BUILD_UNIT_TESTS) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_unittests) endif() @@ -1838,7 +1838,7 @@ if (WIN32 AND NOT GDK_PLATFORM AND NOT CMAKE_CROSSCOMPILING) endif() foreach(target_name ${ONNXRUNTIME_CMAKE_FILES}) - include(${target_name}.cmake) #TODO[Karim] + include(${target_name}.cmake) endforeach() if (UNIX) option(BUILD_PKGCONFIG_FILES "Build and install pkg-config files" ON) From 80c2c620632b4bb74799f36d3db59aef9e19747c Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Sat, 11 Jan 2025 14:13:13 -0800 Subject: [PATCH 07/25] Fix cmake toolset settings --- tools/ci_build/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 03710b795f7d7..24a29a97fac1c 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2884,7 +2884,7 @@ def main(): make_extra_args = ["-A", target_arch, "-G", args.cmake_generator] - if (args.use_cuda and (not args.enable_generic_interface)): + if args.use_cuda: cmake_extra_args += ["-T", toolset] if args.enable_wcos: From 13952aff24b61a23a9c584f79414cfa89caf3642 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Mon, 13 Jan 2025 13:14:09 -0800 Subject: [PATCH 08/25] Fix toolset settings in CMake Remove TODO[low] that are no longer applicable --- cmake/CMakeLists.txt | 2 +- cmake/adjust_global_compile_flags.cmake | 2 +- cmake/external/onnxruntime_external_deps.cmake | 2 +- tools/ci_build/build.py | 11 +++-------- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index d0eb1e11094ce..6de5de6b03940 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -726,7 +726,7 @@ if (WIN32) set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /INCREMENTAL:NO") endif() endif() - if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) #TODO[Low] Any changes ? + if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /LTCG") set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /LTCG") diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake index 9b7bb159d76a4..8b5a744e497a6 100644 --- a/cmake/adjust_global_compile_flags.cmake +++ b/cmake/adjust_global_compile_flags.cmake @@ -274,7 +274,7 @@ if (MSVC) string(APPEND CMAKE_C_FLAGS " /arch:AVX512") endif() - if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) #TODO[Low] Any changes ? + if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Gw /GL") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Gw /GL") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Gw /GL") diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 7a7a25b2ab50b..f77d020a89f88 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -405,7 +405,7 @@ if ((CPUINFO_SUPPORTED OR onnxruntime_USE_XNNPACK) AND NOT ANDROID) endif() endif() -if(onnxruntime_USE_CUDA) #TODO[Low] Any changes? +if(onnxruntime_USE_CUDA) FetchContent_Declare( GSL URL ${DEP_URL_microsoft_gsl} diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 24a29a97fac1c..9707585e1b870 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2875,18 +2875,13 @@ def main(): toolset = "host=" + host_arch + ",version=" + args.msvc_toolset else: toolset = "host=" + host_arch - if args.cuda_version: + if args.use_cuda and args.cuda_version: toolset += ",cuda=" + args.cuda_version - elif args.cuda_home: + elif args.use_cuda and args.cuda_home: toolset += ",cuda=" + args.cuda_home if args.windows_sdk_version: target_arch += ",version=" + args.windows_sdk_version - - make_extra_args = ["-A", target_arch, "-G", args.cmake_generator] - - if args.use_cuda: - cmake_extra_args += ["-T", toolset] - + cmake_extra_args = ["-A", target_arch, "-T", toolset, "-G", args.cmake_generator] if args.enable_wcos: cmake_extra_defines.append("CMAKE_USER_MAKE_RULES_OVERRIDE=wcos_rules_override.cmake") From b971005e0dfac9d27fd3fd1b6403085f9f673ce9 Mon Sep 17 00:00:00 2001 From: jslhcl Date: Tue, 14 Jan 2025 10:51:35 -0800 Subject: [PATCH 09/25] fix comments --- cmake/CMakeLists.txt | 11 ++++++++--- samples/GenericInterface/test.cpp | 19 ++++++++----------- tools/ci_build/build.py | 5 ++++- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 6de5de6b03940..b48f391dd39b2 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -259,6 +259,11 @@ option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF) option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for threadpool." OFF) option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF) +option(onnxruntime_USE_TENSORRT_INTERFACE "Build ONNXRuntime shared lib which is compatible with TensorRT EP interface" OFF) +option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is compatible with OpenVINO EP interface" OFF) +option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF) +option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF) + # ENABLE_TRAINING includes all training functionality # The following 2 entry points # 1. ORTModule @@ -1114,7 +1119,7 @@ function(onnxruntime_set_compile_flags target_name) endif() set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON) - + if (onnxruntime_USE_CUDA) # Suppress a "conversion_function_not_usable" warning in gsl/span target_compile_options(${target_name} PRIVATE "$<$:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">") @@ -1417,7 +1422,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS) ) endif() -if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) +if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) add_definitions(-DUSE_OPENVINO=1) @@ -1770,7 +1775,7 @@ if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_BUILD_APPLE_FRAMEWORK) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime) endif() -if (onnxruntime_BUILD_JAVA) +if (onnxruntime_BUILD_JAVA) message(STATUS "Java Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_java) endif() diff --git a/samples/GenericInterface/test.cpp b/samples/GenericInterface/test.cpp index 64cb95e2b8388..d29d00013cb73 100644 --- a/samples/GenericInterface/test.cpp +++ b/samples/GenericInterface/test.cpp @@ -14,31 +14,28 @@ inline void THROW_ON_ERROR(OrtStatus* status) { void RunRelu(const OrtApi* g_ort, OrtEnv* p_env, OrtSessionOptions* so) { OrtSession* session = nullptr; -#ifdef _WIN32 - THROW_ON_ERROR(g_ort->CreateSession(p_env, L"C:/share/models/relu/Relu.onnx", so, &session)); -#else - THROW_ON_ERROR(g_ort->CreateSession(p_env, "/home/leca/code/onnxruntime/samples/c_test/Relu.onnx", so, &session)); -#endif + // Copy relu.onnx model from winml\test\collateral\models to the same path as the executable + THROW_ON_ERROR(g_ort->CreateSession(p_env, L"relu.onnx", so, &session)); OrtMemoryInfo* memory_info = nullptr; THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); - float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f}; - const size_t input_len = 4 * sizeof(float); - const int64_t input_shape[] = {4}; + float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f, 0.0f}; + const size_t input_len = 5 * sizeof(float); + const int64_t input_shape[] = {5}; const size_t shape_len = sizeof(input_shape)/sizeof(input_shape[0]); OrtValue* input_tensor = nullptr; THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); - const char* input_names[] = {"x"}; - const char* output_names[] = {"graphOut"}; + const char* input_names[] = {"X"}; + const char* output_names[] = {"Y"}; OrtValue* output_tensor = nullptr; THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); float* output_tensor_data = nullptr; THROW_ON_ERROR(g_ort->GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); std::cout<<"Result:\n"; - for (size_t i = 0; i < 4; i++) std::cout< Date: Tue, 14 Jan 2025 14:07:35 -0800 Subject: [PATCH 10/25] Fixing lintrunner warnings ( lintrunner -a) --- .../shared_library/provider_interfaces.h | 8 +- .../core/session/provider_bridge_ort.cc | 8 +- samples/GenericInterface/test.cpp | 90 +++++++++---------- tools/ci_build/build.py | 24 +++-- 4 files changed, 70 insertions(+), 60 deletions(-) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 1f973c5ceeded..ba46e82f83258 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -178,7 +178,7 @@ struct ProviderHost { virtual std::string demangle(const char* name) = 0; virtual std::string demangle(const std::string& name) = 0; -//#ifdef USE_CUDA + // #ifdef USE_CUDA virtual std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateCUDAPinnedAllocator(const char* name) = 0; virtual std::unique_ptr CreateGPUDataTransfer() = 0; @@ -190,7 +190,7 @@ struct ProviderHost { virtual Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; virtual void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; -//#endif + // #endif #ifdef USE_MIGRAPHX virtual std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0; @@ -1256,9 +1256,9 @@ struct ProviderHost { virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0; #endif -//#if defined(USE_CUDA) || defined(USE_ROCM) + // #if defined(USE_CUDA) || defined(USE_ROCM) virtual PhiloxGenerator& PhiloxGenerator__Default() = 0; -//#endif + // #endif #ifdef ENABLE_TRAINING_TORCH_INTEROP virtual void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) = 0; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 7c1423c4a3dbf..ac5ec2bb37862 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -258,7 +258,7 @@ struct ProviderHostImpl : ProviderHost { void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); } void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); } -//#ifdef USE_CUDA + // #ifdef USE_CUDA std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); } std::unique_ptr CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); } std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } @@ -271,7 +271,7 @@ struct ProviderHostImpl : ProviderHost { Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); } void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); } -//#endif + // #endif #ifdef USE_MIGRAPHX std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); } @@ -1560,9 +1560,9 @@ struct ProviderHostImpl : ProviderHost { training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); } #endif -//#if defined(USE_CUDA) || defined(USE_ROCM) + // #if defined(USE_CUDA) || defined(USE_ROCM) PhiloxGenerator& PhiloxGenerator__Default() override { return PhiloxGenerator::Default(); } -//#endif + // #endif #ifdef ENABLE_TRAINING_TORCH_INTEROP void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) override { p->PythonOpBase::Init(info); } diff --git a/samples/GenericInterface/test.cpp b/samples/GenericInterface/test.cpp index d29d00013cb73..ddfc30a1fedf6 100644 --- a/samples/GenericInterface/test.cpp +++ b/samples/GenericInterface/test.cpp @@ -6,64 +6,64 @@ const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); inline void THROW_ON_ERROR(OrtStatus* status) { - if (status != nullptr) { - std::cout<<"ErrorMessage:"<GetErrorMessage(status)<<"\n"; - abort(); - } + if (status != nullptr) { + std::cout << "ErrorMessage:" << g_ort->GetErrorMessage(status) << "\n"; + abort(); + } } void RunRelu(const OrtApi* g_ort, OrtEnv* p_env, OrtSessionOptions* so) { - OrtSession* session = nullptr; - // Copy relu.onnx model from winml\test\collateral\models to the same path as the executable - THROW_ON_ERROR(g_ort->CreateSession(p_env, L"relu.onnx", so, &session)); + OrtSession* session = nullptr; + // Copy relu.onnx model from winml\test\collateral\models to the same path as the executable + THROW_ON_ERROR(g_ort->CreateSession(p_env, L"relu.onnx", so, &session)); - OrtMemoryInfo* memory_info = nullptr; - THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); - float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f, 0.0f}; - const size_t input_len = 5 * sizeof(float); - const int64_t input_shape[] = {5}; - const size_t shape_len = sizeof(input_shape)/sizeof(input_shape[0]); + OrtMemoryInfo* memory_info = nullptr; + THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); + float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f, 0.0f}; + const size_t input_len = 5 * sizeof(float); + const int64_t input_shape[] = {5}; + const size_t shape_len = sizeof(input_shape) / sizeof(input_shape[0]); - OrtValue* input_tensor = nullptr; - THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); + OrtValue* input_tensor = nullptr; + THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); - const char* input_names[] = {"X"}; - const char* output_names[] = {"Y"}; - OrtValue* output_tensor = nullptr; - THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); + const char* input_names[] = {"X"}; + const char* output_names[] = {"Y"}; + OrtValue* output_tensor = nullptr; + THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); - float* output_tensor_data = nullptr; - THROW_ON_ERROR(g_ort->GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); - std::cout<<"Result:\n"; - for (size_t i = 0; i < 5; i++) std::cout<GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); + std::cout << "Result:\n"; + for (size_t i = 0; i < 5; i++) std::cout << output_tensor_data[i] << " \n"; } int main() { - int a; - std::cout<<"prepare to attach:"; - std::cin>>a; + int a; + std::cout << "prepare to attach:"; + std::cin >> a; - OrtEnv* p_env = nullptr; - OrtLoggingLevel log_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR;//OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO; - THROW_ON_ERROR(g_ort->CreateEnv(log_level, "", &p_env)); - OrtSessionOptions* so = nullptr; - THROW_ON_ERROR(g_ort->CreateSessionOptions(&so)); + OrtEnv* p_env = nullptr; + OrtLoggingLevel log_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR; // OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO; + THROW_ON_ERROR(g_ort->CreateEnv(log_level, "", &p_env)); + OrtSessionOptions* so = nullptr; + THROW_ON_ERROR(g_ort->CreateSessionOptions(&so)); - OrtTensorRTProviderOptionsV2* tensorrt_options = nullptr; - THROW_ON_ERROR(g_ort->CreateTensorRTProviderOptions(&tensorrt_options)); - THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_TensorRT_V2(so, tensorrt_options)); + OrtTensorRTProviderOptionsV2* tensorrt_options = nullptr; + THROW_ON_ERROR(g_ort->CreateTensorRTProviderOptions(&tensorrt_options)); + THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_TensorRT_V2(so, tensorrt_options)); - std::unordered_map ov_options; - ov_options["device_type"] = "CPU"; - ov_options["precision"] = "FP32"; - std::vector keys, values; - for (const auto& entry : ov_options) { - keys.push_back(entry.first.c_str()); - values.push_back(entry.second.c_str()); - } - THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_OpenVINO_V2(so, keys.data(), values.data(), keys.size())); + std::unordered_map ov_options; + ov_options["device_type"] = "CPU"; + ov_options["precision"] = "FP32"; + std::vector keys, values; + for (const auto& entry : ov_options) { + keys.push_back(entry.first.c_str()); + values.push_back(entry.second.c_str()); + } + THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_OpenVINO_V2(so, keys.data(), values.data(), keys.size())); - RunRelu(g_ort, p_env, so); + RunRelu(g_ort, p_env, so); - return 0; + return 0; } diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 988a0f2c993f8..a3e876985d514 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -782,7 +782,11 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels") parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") - parser.add_argument("--enable_generic_interface", action="store_true", help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests") + parser.add_argument( + "--enable_generic_interface", + action="store_true", + help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests", + ) if not is_windows(): parser.add_argument( @@ -1017,7 +1021,9 @@ def generate_build_tree( disable_optional_type = "optional" in types_to_disable disable_sparse_tensors = "sparsetensor" in types_to_disable - enable_qnn_interface = True if((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) else False + enable_qnn_interface = ( + True if ((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) else False + ) cmake_args += [ "-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), @@ -1047,10 +1053,14 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), # interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs - "-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), - "-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), - "-Donnxruntime_USE_VITISAI_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), - "-Donnxruntime_USE_QNN_INTERFACE=" + ("ON" if (args.enable_generic_interface and enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_TENSORRT_INTERFACE=" + + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_OPENVINO_INTERFACE=" + + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_VITISAI_INTERFACE=" + + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_QNN_INTERFACE=" + + ("ON" if (args.enable_generic_interface and enable_qnn_interface) else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -2770,7 +2780,7 @@ def main(): source_dir = os.path.normpath(os.path.join(script_dir, "..", "..")) # if using cuda, setup cuda paths and env vars - #cuda_home, cudnn_home = setup_cuda_vars(args) + # cuda_home, cudnn_home = setup_cuda_vars(args) cuda_home = "" cudnn_home = "" if args.use_cuda: From 18984d83b6048e6bdcd74de6b614239aec27bb16 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Tue, 14 Jan 2025 16:26:55 -0800 Subject: [PATCH 11/25] Few more issues --- cmake/CMakeLists.txt | 2 +- cmake/onnxruntime.cmake | 6 +++--- onnxruntime/core/session/provider_bridge_ort.cc | 2 -- tools/ci_build/build.py | 4 +--- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index b48f391dd39b2..c31e9f8680f99 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1479,7 +1479,7 @@ if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) endif() -if (onnxruntime_USE_VITISAI) #TODO[Karim] +if (onnxruntime_USE_VITISAI) set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_LIST_DIR}") endif() diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 5398050c7a139..78edb4179fafd 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -37,7 +37,7 @@ function(get_c_cxx_api_headers HEADERS_VAR) endif() # need to add header files for enabled EPs - foreach(f ${ONNXRUNTIME_PROVIDER_NAMES}) #TODO[Karim] + foreach(f ${ONNXRUNTIME_PROVIDER_NAMES}) # The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory # with onnxruntime_c_api.h . Most other EPs probably also do not work in this way. if((NOT f STREQUAL cuda) AND (NOT f STREQUAL rocm)) @@ -66,12 +66,12 @@ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") add_custom_command(OUTPUT ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c COMMAND ${Python_EXECUTABLE} "${REPO_ROOT}/tools/ci_build/gen_def.py" --version_file "${ONNXRUNTIME_ROOT}/../VERSION_NUMBER" --src_root "${ONNXRUNTIME_ROOT}" - --config ${ONNXRUNTIME_PROVIDER_NAMES} --style=${OUTPUT_STYLE} --output ${SYMBOL_FILE} #TODO[Karim] + --config ${ONNXRUNTIME_PROVIDER_NAMES} --style=${OUTPUT_STYLE} --output ${SYMBOL_FILE} --output_source ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c DEPENDS ${SYMBOL_FILES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -add_custom_target(onnxruntime_generate_def ALL DEPENDS ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c) #TODO[Karim] +add_custom_target(onnxruntime_generate_def ALL DEPENDS ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c) endif() if(WIN32) onnxruntime_add_shared_library(onnxruntime diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index ac5ec2bb37862..be047147983df 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -258,7 +258,6 @@ struct ProviderHostImpl : ProviderHost { void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); } void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); } - // #ifdef USE_CUDA std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); } std::unique_ptr CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); } std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } @@ -271,7 +270,6 @@ struct ProviderHostImpl : ProviderHost { Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); } void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); } - // #endif #ifdef USE_MIGRAPHX std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); } diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index a3e876985d514..543079dbad253 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1021,9 +1021,7 @@ def generate_build_tree( disable_optional_type = "optional" in types_to_disable disable_sparse_tensors = "sparsetensor" in types_to_disable - enable_qnn_interface = ( - True if ((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) else False - ) + enable_qnn_interface = bool((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) cmake_args += [ "-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), From 720280fd37c55262fe4f5792178f6870d014f243 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Wed, 15 Jan 2025 15:23:20 -0800 Subject: [PATCH 12/25] Fix Pipeline issues with QNN --- tools/ci_build/build.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 543079dbad253..b0b8d7afe8c5a 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1381,6 +1381,7 @@ def generate_build_tree( cmake_args += ["-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=" + args.xcode_code_signing_team_id] if args.use_qnn: + if args.qnn_home is None or os.path.exists(args.qnn_home) is False: raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") cmake_args += ["-Donnxruntime_USE_QNN=ON"] @@ -2795,8 +2796,9 @@ def main(): armnn_home = args.armnn_home armnn_libs = args.armnn_libs - qnn_home = args.qnn_home qnn_home = "" + if args.use_qnn: + qnn_home = args.qnn_home # if using tensorrt, setup tensorrt paths tensorrt_home = "" From 73c175cce983dc72eb1a64736aaec41120f1c651 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Sat, 18 Jan 2025 10:25:01 -0800 Subject: [PATCH 13/25] Incorporate code review comments --- cmake/CMakeLists.txt | 4 ++-- cmake/external/onnxruntime_external_deps.cmake | 1 - cmake/onnxruntime_providers.cmake | 6 ++++++ .../core/providers/shared_library/provider_interfaces.h | 1 - tools/ci_build/build.py | 2 ++ 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index c31e9f8680f99..8650cc53d93ef 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -260,6 +260,7 @@ option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for thre option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF) option(onnxruntime_USE_TENSORRT_INTERFACE "Build ONNXRuntime shared lib which is compatible with TensorRT EP interface" OFF) +option(onnxruntime_USE_CUDA_INTERFACE "Build ONNXRuntime shared lib which is compatible with Cuda EP interface" OFF) option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is compatible with OpenVINO EP interface" OFF) option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF) option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF) @@ -1119,7 +1120,6 @@ function(onnxruntime_set_compile_flags target_name) endif() set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON) - if (onnxruntime_USE_CUDA) # Suppress a "conversion_function_not_usable" warning in gsl/span target_compile_options(${target_name} PRIVATE "$<$:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">") @@ -1127,7 +1127,7 @@ function(onnxruntime_set_compile_flags target_name) endif() if (MSVC) foreach(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - #target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>") + target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>") endforeach() foreach(onnxruntime_external_lib IN LISTS onnxruntime_EXTERNAL_LIBRARIES) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index f77d020a89f88..761ce47582ee5 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -728,7 +728,6 @@ if (onnxruntime_USE_WEBGPU) endif() set(onnxruntime_LINK_DIRS) - if (onnxruntime_USE_CUDA) find_package(CUDAToolkit REQUIRED) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index f03d120f4302d..6ddfec0a42d41 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -59,6 +59,12 @@ function(add_op_reduction_include_dirs target) endfunction() +if(onnxruntime_USE_VITISAI) + set(PROVIDERS_VITISAI onnxruntime_providers_vitisai) +endif() +if(onnxruntime_USE_CUDA) + set(PROVIDERS_CUDA onnxruntime_providers_cuda) +endif() if(onnxruntime_USE_COREML) set(PROVIDERS_COREML onnxruntime_providers_coreml coreml_proto) endif() diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index ba46e82f83258..03f5e69720940 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -200,7 +200,6 @@ struct ProviderHost { #ifdef USE_ROCM virtual std::unique_ptr CreateROCMAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateROCMPinnedAllocator(const char* name) = 0; - virtual std::unique_ptr CreateGPUDataTransfer() = 0; virtual void rocm__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) = 0; virtual void rocm__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) = 0; diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index b0b8d7afe8c5a..ebde2799b84a4 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1053,6 +1053,8 @@ def generate_build_tree( # interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs "-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_CUDA_INTERFACE=" + + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), "-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), "-Donnxruntime_USE_VITISAI_INTERFACE=" From f8b75b2b5f92383affb382d95e5c6a72b1bac9da Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Tue, 21 Jan 2025 10:43:59 -0800 Subject: [PATCH 14/25] fix function definition --- onnxruntime/core/session/provider_bridge_ort.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index be047147983df..29fc86226f27c 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -260,7 +260,6 @@ struct ProviderHostImpl : ProviderHost { std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); } std::unique_ptr CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); } - std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } void cuda__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); } void cuda__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); } @@ -289,6 +288,8 @@ struct ProviderHostImpl : ProviderHost { Status RocmCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_ROCM().RocmCall_false(retCode, exprString, libName, successCode, msg, file, line); } void RocmCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_ROCM().RocmCall_true(retCode, exprString, libName, successCode, msg, file, line); } +#else + std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } #endif std::string GetEnvironmentVar(const std::string& var_name) override { return Env::Default().GetEnvironmentVar(var_name); } From 8210cd0d9e3c9a0dd699d7d5ba1debd289de3b36 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Tue, 21 Jan 2025 11:16:30 -0800 Subject: [PATCH 15/25] fix python syntax --- tools/ci_build/build.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index ebde2799b84a4..b8c88ce940b25 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1383,7 +1383,6 @@ def generate_build_tree( cmake_args += ["-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=" + args.xcode_code_signing_team_id] if args.use_qnn: - if args.qnn_home is None or os.path.exists(args.qnn_home) is False: raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") cmake_args += ["-Donnxruntime_USE_QNN=ON"] From 21155369fdf9aad03ae28966d50a9b8604e12294 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Tue, 21 Jan 2025 14:13:50 -0800 Subject: [PATCH 16/25] Deleted test files --- samples/GenericInterface/CMakeLists.txt | 13 ----- samples/GenericInterface/test.cpp | 69 ------------------------- 2 files changed, 82 deletions(-) delete mode 100644 samples/GenericInterface/CMakeLists.txt delete mode 100644 samples/GenericInterface/test.cpp diff --git a/samples/GenericInterface/CMakeLists.txt b/samples/GenericInterface/CMakeLists.txt deleted file mode 100644 index 29c141adbbab5..0000000000000 --- a/samples/GenericInterface/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -# usage: -# cd build/ -# cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug -# cmake --build ./ -# NOTE: For Windows, copy onnxruntime.dll and onnxruntime.pdb into the same folder of TestOutTreeEp.exe, otherwise, during runtime, -# it will search the default system path (C:\Windows\System32) for onnxruntime.dll -cmake_minimum_required(VERSION 3.26) -project(GenericOrtEpInterface) -add_executable(GenericOrtEpInterface test.cpp) - -target_include_directories(GenericOrtEpInterface PUBLIC "../../include/onnxruntime") -target_link_libraries(GenericOrtEpInterface PUBLIC "C:/Users/leca/source/onnxruntime3/samples/GenericInterface/build/Debug/onnxruntime.lib") - diff --git a/samples/GenericInterface/test.cpp b/samples/GenericInterface/test.cpp deleted file mode 100644 index ddfc30a1fedf6..0000000000000 --- a/samples/GenericInterface/test.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include -#include -#include -#include "core/session/onnxruntime_c_api.h" - -const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); - -inline void THROW_ON_ERROR(OrtStatus* status) { - if (status != nullptr) { - std::cout << "ErrorMessage:" << g_ort->GetErrorMessage(status) << "\n"; - abort(); - } -} - -void RunRelu(const OrtApi* g_ort, OrtEnv* p_env, OrtSessionOptions* so) { - OrtSession* session = nullptr; - // Copy relu.onnx model from winml\test\collateral\models to the same path as the executable - THROW_ON_ERROR(g_ort->CreateSession(p_env, L"relu.onnx", so, &session)); - - OrtMemoryInfo* memory_info = nullptr; - THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); - float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f, 0.0f}; - const size_t input_len = 5 * sizeof(float); - const int64_t input_shape[] = {5}; - const size_t shape_len = sizeof(input_shape) / sizeof(input_shape[0]); - - OrtValue* input_tensor = nullptr; - THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); - - const char* input_names[] = {"X"}; - const char* output_names[] = {"Y"}; - OrtValue* output_tensor = nullptr; - THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); - - float* output_tensor_data = nullptr; - THROW_ON_ERROR(g_ort->GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); - std::cout << "Result:\n"; - for (size_t i = 0; i < 5; i++) std::cout << output_tensor_data[i] << " \n"; -} - -int main() { - int a; - std::cout << "prepare to attach:"; - std::cin >> a; - - OrtEnv* p_env = nullptr; - OrtLoggingLevel log_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR; // OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO; - THROW_ON_ERROR(g_ort->CreateEnv(log_level, "", &p_env)); - OrtSessionOptions* so = nullptr; - THROW_ON_ERROR(g_ort->CreateSessionOptions(&so)); - - OrtTensorRTProviderOptionsV2* tensorrt_options = nullptr; - THROW_ON_ERROR(g_ort->CreateTensorRTProviderOptions(&tensorrt_options)); - THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_TensorRT_V2(so, tensorrt_options)); - - std::unordered_map ov_options; - ov_options["device_type"] = "CPU"; - ov_options["precision"] = "FP32"; - std::vector keys, values; - for (const auto& entry : ov_options) { - keys.push_back(entry.first.c_str()); - values.push_back(entry.second.c_str()); - } - THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_OpenVINO_V2(so, keys.data(), values.data(), keys.size())); - - RunRelu(g_ort, p_env, so); - - return 0; -} From 955bbae8c0bc355e808ca884c553453d9364e7f2 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Tue, 21 Jan 2025 15:52:28 -0800 Subject: [PATCH 17/25] Fix build interface logic --- tools/ci_build/build.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index b8c88ce940b25..c595047538f2f 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1021,7 +1021,16 @@ def generate_build_tree( disable_optional_type = "optional" in types_to_disable disable_sparse_tensors = "sparsetensor" in types_to_disable - enable_qnn_interface = bool((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) + enable_qnn_interface = False + if args.enable_generic_interface: + host_arch = platform.machine() + if host_arch == "AMD64": + if args.arm64 or args.arm or args.arm64ec: + enable_qnn_interface = True + elif host_arch == "ARM64": + enable_qnn_interface = True + else: + raise BuildError("unknown python arch") cmake_args += [ "-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), From 7cfa2e2529eb035d7575843fb9c6773867f283c5 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Wed, 22 Jan 2025 14:26:04 -0800 Subject: [PATCH 18/25] fix code comments after rebase --- cmake/onnxruntime_providers.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 6ddfec0a42d41..78e18688f6690 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -160,7 +160,7 @@ if (onnxruntime_USE_JSEP) include(onnxruntime_providers_js.cmake) endif() -if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE) #TODO[Low] Revisit when QNN EP becomes dynamic lib. +if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE) include(onnxruntime_providers_qnn.cmake) endif() From c8ae7e653b6627e497397faf89ee7e053ee9871b Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Wed, 22 Jan 2025 14:48:30 -0800 Subject: [PATCH 19/25] Revert "accidental cmake/external/onnx changes" --- cmake/external/onnx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/external/onnx b/cmake/external/onnx index 595228d99e397..b8baa84466864 160000 --- a/cmake/external/onnx +++ b/cmake/external/onnx @@ -1 +1 @@ -Subproject commit 595228d99e3977ac27cb79d5963adda262af99ad +Subproject commit b8baa8446686496da4cc8fda09f2b6fe65c2a02c From bb115e536d93a7931881e1842cf89a4f27b965cf Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Wed, 22 Jan 2025 15:34:32 -0800 Subject: [PATCH 20/25] Avoid building qnn-ep when generic interface used --- cmake/onnxruntime_providers.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 78e18688f6690..67fa48b28278d 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -160,7 +160,7 @@ if (onnxruntime_USE_JSEP) include(onnxruntime_providers_js.cmake) endif() -if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE) +if (onnxruntime_USE_QNN) include(onnxruntime_providers_qnn.cmake) endif() From 1a4f5824872abddbca1060b775cb750b733a5129 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Wed, 22 Jan 2025 15:59:30 -0800 Subject: [PATCH 21/25] Add explicit build error when qnn-ep is build as static library with generic ORT interface --- tools/ci_build/build.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index c595047538f2f..6c4f4d98049b5 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1400,6 +1400,8 @@ def generate_build_tree( cmake_args += ["-Donnxruntime_BUILD_QNN_EP_STATIC_LIB=ON"] if args.android and args.use_qnn != "static_lib": raise BuildError("Only support Android + QNN builds with QNN EP built as a static library.") + if args.use_qnn == "static_lib" and args.enable_generic_interface: + raise BuildError("Generic ORT interface only supported with QNN EP built as a shared library.") if args.use_coreml: cmake_args += ["-Donnxruntime_USE_COREML=ON"] From 9857e342adda73af202a1299107c6956c174f7ea Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Thu, 23 Jan 2025 10:44:34 -0800 Subject: [PATCH 22/25] Fix codeQL warnings --- .../core/providers/shared_library/provider_interfaces.h | 4 ---- onnxruntime/core/session/provider_bridge_ort.cc | 2 -- 2 files changed, 6 deletions(-) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 03f5e69720940..a1bb86598ebc0 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -178,7 +178,6 @@ struct ProviderHost { virtual std::string demangle(const char* name) = 0; virtual std::string demangle(const std::string& name) = 0; - // #ifdef USE_CUDA virtual std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateCUDAPinnedAllocator(const char* name) = 0; virtual std::unique_ptr CreateGPUDataTransfer() = 0; @@ -190,7 +189,6 @@ struct ProviderHost { virtual Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; virtual void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; - // #endif #ifdef USE_MIGRAPHX virtual std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0; @@ -1255,9 +1253,7 @@ struct ProviderHost { virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0; #endif - // #if defined(USE_CUDA) || defined(USE_ROCM) virtual PhiloxGenerator& PhiloxGenerator__Default() = 0; - // #endif #ifdef ENABLE_TRAINING_TORCH_INTEROP virtual void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) = 0; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 29fc86226f27c..3a694ac6f8e5e 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1559,9 +1559,7 @@ struct ProviderHostImpl : ProviderHost { training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); } #endif - // #if defined(USE_CUDA) || defined(USE_ROCM) PhiloxGenerator& PhiloxGenerator__Default() override { return PhiloxGenerator::Default(); } - // #endif #ifdef ENABLE_TRAINING_TORCH_INTEROP void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) override { p->PythonOpBase::Init(info); } From 9f52af7d54a61d090609db3e9f720f385ed7eea9 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Thu, 23 Jan 2025 23:23:40 -0800 Subject: [PATCH 23/25] Remove commented code and enable interface for all eps by default --- tools/ci_build/build.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 6c4f4d98049b5..a0207fc02f956 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1021,17 +1021,6 @@ def generate_build_tree( disable_optional_type = "optional" in types_to_disable disable_sparse_tensors = "sparsetensor" in types_to_disable - enable_qnn_interface = False - if args.enable_generic_interface: - host_arch = platform.machine() - if host_arch == "AMD64": - if args.arm64 or args.arm or args.arm64ec: - enable_qnn_interface = True - elif host_arch == "ARM64": - enable_qnn_interface = True - else: - raise BuildError("unknown python arch") - cmake_args += [ "-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), "-Donnxruntime_GENERATE_TEST_REPORTS=ON", @@ -1061,15 +1050,15 @@ def generate_build_tree( + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), # interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs "-Donnxruntime_USE_TENSORRT_INTERFACE=" - + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + + ("ON" if args.enable_generic_interface else "OFF"), "-Donnxruntime_USE_CUDA_INTERFACE=" - + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + + ("ON" if args.enable_generic_interface else "OFF"), "-Donnxruntime_USE_OPENVINO_INTERFACE=" - + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + + ("ON" if args.enable_generic_interface else "OFF"), "-Donnxruntime_USE_VITISAI_INTERFACE=" - + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + + ("ON" if args.enable_generic_interface else "OFF"), "-Donnxruntime_USE_QNN_INTERFACE=" - + ("ON" if (args.enable_generic_interface and enable_qnn_interface) else "OFF"), + + ("ON" if args.enable_generic_interface else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -2791,7 +2780,6 @@ def main(): source_dir = os.path.normpath(os.path.join(script_dir, "..", "..")) # if using cuda, setup cuda paths and env vars - # cuda_home, cudnn_home = setup_cuda_vars(args) cuda_home = "" cudnn_home = "" if args.use_cuda: From 6761a9302c427e53ef9786291c45e01cc128e2ff Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Thu, 23 Jan 2025 23:38:52 -0800 Subject: [PATCH 24/25] Update tools/ci_build/build.py Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- tools/ci_build/build.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index a0207fc02f956..cc733f859fe0b 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1049,16 +1049,11 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), # interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs - "-Donnxruntime_USE_TENSORRT_INTERFACE=" - + ("ON" if args.enable_generic_interface else "OFF"), - "-Donnxruntime_USE_CUDA_INTERFACE=" - + ("ON" if args.enable_generic_interface else "OFF"), - "-Donnxruntime_USE_OPENVINO_INTERFACE=" - + ("ON" if args.enable_generic_interface else "OFF"), - "-Donnxruntime_USE_VITISAI_INTERFACE=" - + ("ON" if args.enable_generic_interface else "OFF"), - "-Donnxruntime_USE_QNN_INTERFACE=" - + ("ON" if args.enable_generic_interface else "OFF"), + "-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + "-Donnxruntime_USE_CUDA_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + "-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + "-Donnxruntime_USE_VITISAI_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + "-Donnxruntime_USE_QNN_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), From 453a17a0ad2938ce36fdd946653911c23c293c10 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Mon, 27 Jan 2025 14:43:59 -0800 Subject: [PATCH 25/25] Add ci pipline build step for windows --- .../azure-pipelines/win-ci-pipeline.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml index 94c2d35a563b6..d96f1cb68c388 100644 --- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml @@ -177,6 +177,25 @@ stages: WITH_CACHE: false MachinePool: 'onnxruntime-Win-CPU-2022' +- stage: x64_release_ep_generic_interface + dependsOn: [] + jobs: + - template: templates/jobs/win-ci-vs-2022-job.yml + parameters: + BuildConfig: 'RelWithDebInfo' + buildArch: x64 + additionalBuildFlags: --enable_generic_interface + msbuildPlatform: x64 + isX86: false + job_name_suffix: x64_release_ep_generic_interface + RunOnnxRuntimeTests: false # --enable_generic_interface does not build tests + EnablePython: false + isTraining: false + ORT_EP_NAME: CPU + GenerateDocumentation: false + WITH_CACHE: false + MachinePool: 'onnxruntime-Win-CPU-2022' + - stage: x86_release dependsOn: [] jobs: