PR #22532: [ROCm] Use device allocate for collective memory allocation

Imported from GitHub PR #22532 This extends #22102 and fixes //xla/stream_executor/gpu:gpu_executor_test_gpu_amd_any Copybara import of the project: -- 5be78ab by Harsha HS <[email protected]>: [ROCm] Use device allocate for collective memory allocation This extends #22102 and fixes //xla/stream_executor/gpu:gpu_executor_test_gpu_amd_any Merging this change closes #22532 COPYBARA_INTEGRATE_REVIEW=#22532 from ROCm:ci_fix_collective_alloc_20250210 5be78ab PiperOrigin-RevId: 725996277
openxla · Feb 12, 2025 · 105e240 · 105e240
1 parent 92f4589
commit 105e240
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/xla/stream_executor/rocm/rocm_executor.cc b/xla/stream_executor/rocm/rocm_executor.cc
@@ -732,6 +732,9 @@ absl::StatusOr<ModuleHandle> RocmExecutor::LoadModuleFromHsaco(
 }
 
 DeviceMemoryBase RocmExecutor::Allocate(uint64_t size, int64_t memory_space) {
+  if (memory_space == static_cast<int64_t>(MemoryType::kCollective)) {
+    return DeviceMemoryBase(DeviceAllocate(rocm_context_, size), size);
+  }
   if (memory_space ==
       static_cast<int64_t>(stream_executor::MemoryType::kHost)) {
     auto result = HostAllocate(rocm_context_, size);