|
14 | 14 | #include "nccl_ofi_log.h" |
15 | 15 | #include "nccl_ofi_param.h" |
16 | 16 |
|
17 | | -#define DECLARE_CUDA_FUNCTION(function) static PFN_##function pfn_##function = NULL |
18 | | -#define RESOLVE_CUDA_FUNCTION(function) \ |
19 | | - do { \ |
| 17 | +#define DECLARE_CUDA_FUNCTION(function, version) static PFN_##function##_v##version pfn_##function = NULL |
| 18 | + |
| 19 | +#if CUDART_VERSION >= 13000 |
| 20 | +#define RESOLVE_CUDA_FUNCTION(function, version) do { \ |
| 21 | + enum cudaDriverEntryPointQueryResult result; \ |
| 22 | + cudaError_t err = \ |
| 23 | + cudaGetDriverEntryPointByVersion(#function, (void **)&pfn_##function, version, cudaEnableDefault, &result); \ |
| 24 | + if (err != cudaSuccess) { \ |
| 25 | + switch (result) { \ |
| 26 | + case cudaDriverEntryPointSymbolNotFound: \ |
| 27 | + NCCL_OFI_WARN("Failed to resolve CUDA function %s", #function); \ |
| 28 | + break; \ |
| 29 | + case cudaDriverEntryPointVersionNotSufficent: \ |
| 30 | + NCCL_OFI_WARN("Insufficient driver to use CUDA function %s", #function); \ |
| 31 | + break; \ |
| 32 | + case cudaDriverEntryPointSuccess: \ |
| 33 | + default: \ |
| 34 | + NCCL_OFI_WARN("Unexpected cudaDriverEntryPointQueryResutlt value %d", (int)result); \ |
| 35 | + break; \ |
| 36 | + } \ |
| 37 | + } \ |
| 38 | + } while (0); |
| 39 | +#else |
| 40 | +#define RESOLVE_CUDA_FUNCTION(function, version) do { \ |
20 | 41 | enum cudaDriverEntryPointQueryResult result; \ |
21 | 42 | cudaError_t err = \ |
22 | 43 | cudaGetDriverEntryPoint(#function, (void **)&pfn_##function, cudaEnableDefault, &result); \ |
|
35 | 56 | } \ |
36 | 57 | } \ |
37 | 58 | } while (0); |
| 59 | +#endif |
38 | 60 |
|
39 | | -DECLARE_CUDA_FUNCTION(cuCtxGetDevice); |
40 | | -DECLARE_CUDA_FUNCTION(cuDeviceGetAttribute); |
41 | | -DECLARE_CUDA_FUNCTION(cuMemGetHandleForAddressRange); |
42 | | -DECLARE_CUDA_FUNCTION(cuMemGetAddressRange); |
43 | | -DECLARE_CUDA_FUNCTION(cuMemAlloc); |
44 | | -DECLARE_CUDA_FUNCTION(cuMemFree); |
45 | | -DECLARE_CUDA_FUNCTION(cuMemcpy); |
| 61 | +DECLARE_CUDA_FUNCTION(cuCtxGetDevice, 2000); |
| 62 | +DECLARE_CUDA_FUNCTION(cuDeviceGetAttribute, 2000); |
| 63 | +DECLARE_CUDA_FUNCTION(cuMemGetHandleForAddressRange, 11070); |
| 64 | +DECLARE_CUDA_FUNCTION(cuMemGetAddressRange, 3020); |
| 65 | +DECLARE_CUDA_FUNCTION(cuMemAlloc, 3020); |
| 66 | +DECLARE_CUDA_FUNCTION(cuMemFree, 3020); |
| 67 | +DECLARE_CUDA_FUNCTION(cuMemcpy, 4000); |
46 | 68 |
|
47 | 69 | int nccl_net_ofi_cuda_init(void) |
48 | 70 | { |
@@ -70,13 +92,13 @@ int nccl_net_ofi_cuda_init(void) |
70 | 92 | driverVersion, |
71 | 93 | runtimeVersion); |
72 | 94 |
|
73 | | - RESOLVE_CUDA_FUNCTION(cuCtxGetDevice); |
74 | | - RESOLVE_CUDA_FUNCTION(cuDeviceGetAttribute); |
75 | | - RESOLVE_CUDA_FUNCTION(cuMemGetHandleForAddressRange); |
76 | | - RESOLVE_CUDA_FUNCTION(cuMemGetAddressRange); |
77 | | - RESOLVE_CUDA_FUNCTION(cuMemAlloc); |
78 | | - RESOLVE_CUDA_FUNCTION(cuMemFree); |
79 | | - RESOLVE_CUDA_FUNCTION(cuMemcpy); |
| 95 | + RESOLVE_CUDA_FUNCTION(cuCtxGetDevice, 2000); |
| 96 | + RESOLVE_CUDA_FUNCTION(cuDeviceGetAttribute, 2000); |
| 97 | + RESOLVE_CUDA_FUNCTION(cuMemGetHandleForAddressRange, 11070); |
| 98 | + RESOLVE_CUDA_FUNCTION(cuMemGetAddressRange, 3020); |
| 99 | + RESOLVE_CUDA_FUNCTION(cuMemAlloc, 3020); |
| 100 | + RESOLVE_CUDA_FUNCTION(cuMemFree, 3020); |
| 101 | + RESOLVE_CUDA_FUNCTION(cuMemcpy, 4000); |
80 | 102 |
|
81 | 103 | if (HAVE_CUDA_GDRFLUSH_SUPPORT && nccl_net_ofi_cuda_have_gdr_support_attr() && ofi_nccl_cuda_flush_enable()) { |
82 | 104 | NCCL_OFI_WARN("CUDA flush enabled"); |
@@ -129,7 +151,7 @@ int nccl_net_ofi_cuda_mem_alloc(void **ptr, size_t size) |
129 | 151 |
|
130 | 152 | int nccl_net_ofi_cuda_mem_free(void *ptr) |
131 | 153 | { |
132 | | - CUresult ret = pfn_cuMemFree((CUdeviceptr)ptr); |
| 154 | + CUresult ret = pfn_cuMemFree((CUdeviceptr)ptr); |
133 | 155 | return ret == CUDA_SUCCESS ? 0 : -EINVAL; |
134 | 156 | } |
135 | 157 |
|
|
0 commit comments