Skip to content

Commit b4c901b

Browse files
committed
Update NVTX trace dimensions as a run-time config to avoid
unncessary re-compilation. Signed-off-by: Kyle Kim <[email protected]>
1 parent 2fb60af commit b4c901b

File tree

4 files changed

+83
-64
lines changed

4 files changed

+83
-64
lines changed

include/nccl_ofi_param.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,4 +379,20 @@ OFI_NCCL_PARAM(PROGRESS_MODEL, progress_model, "PROGRESS_MODEL", PROGRESS_MODEL
379379
*/
380380
OFI_NCCL_PARAM(std::string, platform, "PLATFORM", "");
381381

382+
/*
383+
* NVTX Tracing dimension. Valid options are PER_COMM and PER_DEV,
384+
* with the default set to PER_COMM.
385+
*
386+
* PER_COMM: Collect NVTX traces in a "per-device" view, which associates sub-events with
387+
* an EFA device, showing activity on each device.
388+
*
389+
* PER_DEV: Collect NVTX traces in a "per-communicator" view, which associates parent
390+
* send/recv events with constituent events (segments, controls.
391+
*
392+
* This environment variable would not take any effect,
393+
* unless --with-nvtx / HAVE_NVTX compile time flag is enabled.
394+
*/
395+
OFI_NCCL_PARAM_VALUE_SET(NVTX_TRACE_DIMENSION, (PER_COMM)(PER_DEV))
396+
OFI_NCCL_PARAM(NVTX_TRACE_DIMENSION, nvtx_trace_dimension, "NVTX_TRACE_DIMENSION", NVTX_TRACE_DIMENSION::PER_COMM)
397+
382398
#endif // End NCCL_OFI_PARAM_H_

include/tracing_impl/nvtx.h

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@ static inline void nvtx_end(nvtxRangeId_t id) {
5151
}
5252

5353
#define NCCL_OFI_TRACE_SEND_NVTX(dev, size, comm, msg_seq_num, request, nccl_req) do { \
54-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
54+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
5555
nvtxDomainHandle_t handle = ((nccl_net_ofi_rdma_send_comm_t*)comm) \
5656
->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
5757
get_send_data(request)->trace_id = nvtx_start_domain(true, handle, "Send", 0xeb9234); \
5858
} \
5959
} while (0)
6060

6161
#define NCCL_OFI_TRACE_SEND_END_NVTX(request) do { \
62-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
62+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
6363
nvtxDomainHandle_t handle = ((nccl_net_ofi_rdma_send_comm_t*)(request->comm)) \
6464
->nvtx_domain[request->msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
6565
nvtx_end_domain(handle, get_send_data(request)->trace_id); \
@@ -68,98 +68,98 @@ static inline void nvtx_end(nvtxRangeId_t id) {
6868

6969
#define NCCL_OFI_TRACE_EAGER_SEND_START_NVTX(dev, rail_id, size, comm, msg_seq_num, request) do { \
7070
nvtxDomainHandle_t handle; \
71-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
71+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
7272
handle = ((nccl_net_ofi_rdma_send_comm_t*)comm)->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
7373
get_send_data(request)->seg_trace_id[rail_id] = nvtx_start_domain(true, handle, "Send_eager", 0x0000FF); \
7474
} \
75-
if (NCCL_OFI_NVTX_TRACE_PER_DEV) { \
75+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) { \
7676
handle = (static_cast<nccl_net_ofi_rdma_ep_t *>(comm->ep)->rdma_endpoint_get_device())->nvtx_domain[rail_id]; \
7777
get_send_data(request)->seg_trace_id[rail_id] = nvtx_start_domain(true, handle, "Send_eager", 0x0000FF); \
7878
} \
7979
} while (0)
8080

8181
#define NCCL_OFI_TRACE_EAGER_SEND_COMPLETE_NVTX(dev, rail_id, comm, msg_seq_num, request) do { \
8282
nvtxDomainHandle_t handle; \
83-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
83+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
8484
handle = ((nccl_net_ofi_rdma_send_comm_t*)comm)->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
8585
nvtx_end_domain(handle, get_send_data(request)->seg_trace_id[rail_id]); \
8686
} \
87-
if (NCCL_OFI_NVTX_TRACE_PER_DEV) { \
87+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) { \
8888
handle = (static_cast<nccl_net_ofi_rdma_ep_t *>(comm->ep)->rdma_endpoint_get_device())->nvtx_domain[rail_id]; \
8989
nvtx_end_domain(handle, get_send_data(request)->seg_trace_id[rail_id]); \
9090
} \
9191
} while(0)
9292

9393
#define NCCL_OFI_TRACE_SEND_CTRL_RECV_NVTX(dev, rail_id, comm, msg_seq_num) do { \
9494
nvtxDomainHandle_t handle; \
95-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
95+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
9696
handle = ((nccl_net_ofi_rdma_send_comm_t*)comm)->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
9797
nvtx_mark_domain(handle, "Send_ctrl_recv", 0x00ffff); \
9898
} \
99-
if (NCCL_OFI_NVTX_TRACE_PER_DEV) { \
99+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) { \
100100
handle = static_cast<nccl_net_ofi_rdma_ep_t *>(s_comm->base.base.ep)->rdma_endpoint_get_device()->nvtx_domain[rail_id]; \
101101
nvtx_mark_domain(handle, "Send_ctrl_recv", 0x00ffff); \
102102
} \
103103
} while (0)
104104

105105
#define NCCL_OFI_TRACE_WRITE_CTRL_START_NVTX(dev, rail_id, comm, req, msg_seq_num) do { \
106106
nvtxDomainHandle_t handle; \
107-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
107+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
108108
handle = ((nccl_net_ofi_rdma_recv_comm_t *)comm)->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
109109
get_recv_data(req)->write_ctrl_trace_id = nvtx_start_domain(true, handle, "Write_ctrl_start", 0x00ffff); \
110110
} \
111-
if (NCCL_OFI_NVTX_TRACE_PER_DEV) { \
111+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) { \
112112
handle = static_cast<nccl_net_ofi_rdma_ep_t *>(comm->ep)->rdma_endpoint_get_device()->nvtx_domain[rail_id]; \
113113
get_recv_data(req)->write_ctrl_trace_id = nvtx_start_domain(true, handle, "Write_ctrl_start", 0x00ffff); \
114114
} \
115115
} while (0)
116116

117117
#define NCCL_OFI_TRACE_WRITE_CTRL_END_NVTX(dev, rail_id, comm, req, msg_seq_num) do { \
118118
nvtxDomainHandle_t handle; \
119-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
119+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
120120
handle = ((nccl_net_ofi_rdma_recv_comm_t *)comm)->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
121121
nvtx_end_domain(handle, get_recv_data(req)->write_ctrl_trace_id); \
122122
} \
123-
if (NCCL_OFI_NVTX_TRACE_PER_DEV) { \
123+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) { \
124124
handle = static_cast<nccl_net_ofi_rdma_ep_t *>(comm->ep)->rdma_endpoint_get_device()->nvtx_domain[rail_id]; \
125125
nvtx_end_domain(handle, get_recv_data(req)->write_ctrl_trace_id);\
126126
} \
127127
} while (0)
128128

129129
#define NCCL_OFI_TRACE_SEND_WRITE_SEG_START_NVTX(dev, rail_id, size, comm, msg_seq_num, request) do { \
130130
nvtxDomainHandle_t handle; \
131-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
131+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
132132
handle = ((nccl_net_ofi_rdma_send_comm_t*)comm)->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
133133
get_send_data(request)->seg_trace_id[rail_id] = nvtx_start_domain(true, handle, "Send_write_seg", 0xff0000); \
134134
} \
135-
if (NCCL_OFI_NVTX_TRACE_PER_DEV) { \
135+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) { \
136136
handle = static_cast<nccl_net_ofi_rdma_ep_t *>(comm->ep)->rdma_endpoint_get_device()->nvtx_domain[rail_id]; \
137137
get_send_data(request)->seg_trace_id[rail_id] = nvtx_start_domain(true, handle, "Send_write_seg", 0xff0000); \
138138
} \
139139
} while(0)
140140

141141
#define NCCL_OFI_TRACE_SEND_WRITE_SEG_COMPLETE_NVTX(dev, rail_id, comm, msg_seq_num, request) do { \
142142
nvtxDomainHandle_t handle; \
143-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
143+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
144144
handle = ((nccl_net_ofi_rdma_send_comm_t*)comm)->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
145145
nvtx_end_domain(handle, get_send_data(request)->seg_trace_id[rail_id]); \
146146
} \
147-
if (NCCL_OFI_NVTX_TRACE_PER_DEV) { \
147+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) { \
148148
handle = static_cast<nccl_net_ofi_rdma_ep_t *>(comm->ep)->rdma_endpoint_get_device()->nvtx_domain[rail_id]; \
149149
nvtx_end_domain(handle, get_send_data(request)->seg_trace_id[rail_id]); \
150150
} \
151151
} while(0)
152152

153153
#define NCCL_OFI_TRACE_RECV_NVTX(dev, r_comm, size, request, nccl_req) do { \
154-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
154+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
155155
nvtxDomainHandle_t handle = ((nccl_net_ofi_rdma_recv_comm_t *)request->comm) \
156156
->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
157157
get_recv_data(request)->trace_id = nvtx_start_domain(true, handle, "Recv", 0x34EB37); \
158158
} \
159159
} while(0)
160160

161161
#define NCCL_OFI_TRACE_RECV_END_NVTX(request) do { \
162-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
162+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
163163
nvtxDomainHandle_t handle = ((nccl_net_ofi_rdma_recv_comm_t *)request->comm) \
164164
->nvtx_domain[request->msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
165165
nvtx_end_domain(handle, get_recv_data(request)->trace_id); \
@@ -168,23 +168,23 @@ static inline void nvtx_end(nvtxRangeId_t id) {
168168

169169
#define NCCL_OFI_TRACE_RECV_SEGMENT_COMPLETE_NVTX(dev, rail_id, size, request, msg_seq_num) do { \
170170
nvtxDomainHandle_t handle; \
171-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
171+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
172172
handle = ((nccl_net_ofi_rdma_recv_comm_t *)request->comm)->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
173173
nvtx_mark_domain(handle, "Recv_segment_complete", 0xff0000); \
174174
} \
175-
if (NCCL_OFI_NVTX_TRACE_PER_DEV) { \
175+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) { \
176176
handle = static_cast<nccl_net_ofi_rdma_ep_t *>(request->comm->ep)->rdma_endpoint_get_device()->nvtx_domain[rail_id]; \
177177
nvtx_mark_domain(handle, "Recv_segment_complete", 0xff0000); \
178178
} \
179179
} while(0)
180180

181181
#define NCCL_OFI_TRACE_EAGER_RECV_NVTX(dev, rail_id, comm, msg_seq_num) do { \
182182
nvtxDomainHandle_t handle; \
183-
if (NCCL_OFI_NVTX_TRACE_PER_COMM) { \
183+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) { \
184184
handle = comm->nvtx_domain[msg_seq_num % NCCL_OFI_N_NVTX_DOMAIN_PER_COMM]; \
185185
nvtx_mark_domain(handle, "Eager_recv", 0x0000FF); \
186186
} \
187-
if (NCCL_OFI_NVTX_TRACE_PER_DEV) { \
187+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) { \
188188
handle = static_cast<nccl_net_ofi_rdma_ep_t *>(r_comm->base.base.ep)->rdma_endpoint_get_device()->nvtx_domain[rail_id]; \
189189
nvtx_mark_domain(handle, "Eager_recv", 0x0000FF); \
190190
} \

m4/check_pkg_nvtx.m4

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,22 +38,6 @@ AC_DEFUN([CHECK_PKG_NVTX], [
3838
[nvtx_tracing=1],
3939
[nvtx_tracing=0])
4040
41-
AC_ARG_ENABLE([nvtx_trace_per_comm], AS_HELP_STRING([--enable-nvtx-trace-per-comm],
42-
[Collect NVTX traces in a "per-communicator" view, which associates parent
43-
send/recv events with constituent events (segments, controls).]))
44-
AS_IF([test "${enableval}" = "yes" -a "x${nvtx_tracing}" = "x1"], [nvtx_trace_per_comm=${nvtx_tracing}], [nvtx_trace_per_comm=0])
45-
46-
AC_ARG_ENABLE([nvtx_trace_per_dev], AS_HELP_STRING([--enable-nvtx-trace-per-dev],
47-
[Collect NVTX traces in a "per-device" view, which associates sub-events with
48-
an EFA device, showing activity on each device.]))
49-
AS_IF([test "${enableval}" = "yes" -a "x${nvtx_tracing}" = "x1"], [nvtx_trace_per_dev=${nvtx_tracing}], [nvtx_trace_per_dev=0])
50-
51-
AC_DEFINE_UNQUOTED([NCCL_OFI_NVTX_TRACE_PER_COMM], [$nvtx_trace_per_comm], [Defined to 1 if NVTX traces are collected per-communicator])
52-
AC_DEFINE_UNQUOTED([NCCL_OFI_NVTX_TRACE_PER_DEV], [$nvtx_trace_per_dev], [Defined to 1 if NVTX traces are collected per-device])
53-
54-
AS_IF([test "${nvtx_trace_per_comm}" -ne 0 -a "${nvtx_trace_per_dev}" -ne 0],
55-
AC_MSG_ERROR([Error: setting both nvtx_trace_per_comm and nvtx_trace_per_dev is currently not supported]))
56-
5741
AS_UNSET([check_pkg_found])
5842
AS_UNSET([check_pkg_define])
5943
AS_UNSET([check_pkg_CPPFLAGS_save])

src/nccl_ofi_rdma.cpp

Lines changed: 45 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3427,9 +3427,11 @@ static int recv_comm_destroy(nccl_net_ofi_rdma_recv_comm_t *r_comm)
34273427
}
34283428

34293429
/* Destroy domain */
3430-
#if HAVE_NVTX_TRACING && NCCL_OFI_NVTX_TRACE_PER_COMM
3431-
for (int i = 0; i < NCCL_OFI_N_NVTX_DOMAIN_PER_COMM; ++i) {
3432-
nvtxDomainDestroy(r_comm->nvtx_domain[i]);
3430+
#if HAVE_NVTX_TRACING
3431+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) {
3432+
for (int i = 0; i < NCCL_OFI_N_NVTX_DOMAIN_PER_COMM; ++i) {
3433+
nvtxDomainDestroy(r_comm->nvtx_domain[i]);
3434+
}
34333435
}
34343436
#endif
34353437

@@ -3665,9 +3667,11 @@ static int send_comm_destroy(nccl_net_ofi_rdma_send_comm_t *s_comm)
36653667
device->comm_idpool.free_id(s_comm->local_comm_id);
36663668

36673669
/* Destroy domain */
3668-
#if HAVE_NVTX_TRACING && NCCL_OFI_NVTX_TRACE_PER_COMM
3669-
for (int i = 0; i < NCCL_OFI_N_NVTX_DOMAIN_PER_COMM; ++i) {
3670-
nvtxDomainDestroy(s_comm->nvtx_domain[i]);
3670+
#if HAVE_NVTX_TRACING
3671+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) {
3672+
for (int i = 0; i < NCCL_OFI_N_NVTX_DOMAIN_PER_COMM; ++i) {
3673+
nvtxDomainDestroy(s_comm->nvtx_domain[i]);
3674+
}
36713675
}
36723676
#endif
36733677

@@ -4485,13 +4489,15 @@ static nccl_net_ofi_rdma_recv_comm_t *prepare_recv_comm(nccl_net_ofi_rdma_domain
44854489
return NULL;
44864490
}
44874491

4488-
#if HAVE_NVTX_TRACING && NCCL_OFI_NVTX_TRACE_PER_COMM
4489-
for (int i = 0; i < NCCL_OFI_N_NVTX_DOMAIN_PER_COMM; ++i)
4490-
{
4491-
/* Create nvtx domain */
4492-
char name[64];
4493-
snprintf(name, 64, "aws-ofi-nccl r_comm %p_%d", r_comm, i);
4494-
r_comm->nvtx_domain[i] = nvtxDomainCreateA(name);
4492+
#if HAVE_NVTX_TRACING
4493+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) {
4494+
for (int i = 0; i < NCCL_OFI_N_NVTX_DOMAIN_PER_COMM; ++i)
4495+
{
4496+
/* Create nvtx domain */
4497+
char name[64];
4498+
snprintf(name, 64, "aws-ofi-nccl r_comm %p_%d", r_comm, i);
4499+
r_comm->nvtx_domain[i] = nvtxDomainCreateA(name);
4500+
}
44954501
}
44964502
#endif
44974503

@@ -6123,13 +6129,15 @@ int nccl_net_ofi_rdma_ep_t::create_send_comm(nccl_net_ofi_rdma_send_comm_t **s_c
61236129
goto error;
61246130
}
61256131

6126-
#if HAVE_NVTX_TRACING && NCCL_OFI_NVTX_TRACE_PER_COMM
6127-
for (int i = 0; i < NCCL_OFI_N_NVTX_DOMAIN_PER_COMM; ++i)
6128-
{
6129-
/* Create nvtx domain */
6130-
char name[64];
6131-
snprintf(name, 64, "aws-ofi-nccl s_comm %p_%d", ret_s_comm, i);
6132-
ret_s_comm->nvtx_domain[i] = nvtxDomainCreateA(name);
6132+
#if HAVE_NVTX_TRACING
6133+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_COMM) {
6134+
for (int i = 0; i < NCCL_OFI_N_NVTX_DOMAIN_PER_COMM; ++i)
6135+
{
6136+
/* Create nvtx domain */
6137+
char name[64];
6138+
snprintf(name, 64, "aws-ofi-nccl s_comm %p_%d", ret_s_comm, i);
6139+
ret_s_comm->nvtx_domain[i] = nvtxDomainCreateA(name);
6140+
}
61336141
}
61346142
#endif
61356143
*s_comm = ret_s_comm;
@@ -6819,6 +6827,15 @@ int nccl_net_ofi_rdma_device_t::cleanup_resources()
68196827
}
68206828
}
68216829

6830+
/* Destroy domain */
6831+
#if HAVE_NVTX_TRACING
6832+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) {
6833+
for (int i = 0; i < this->num_rails; ++i) {
6834+
nvtxDomainDestroy(this->nvtx_domain[i]);
6835+
}
6836+
}
6837+
#endif
6838+
68226839
this->release_device_ofi_resources();
68236840

68246841
assert(ret == 0);
@@ -6930,12 +6947,14 @@ nccl_net_ofi_rdma_device_t::nccl_net_ofi_rdma_device_t(nccl_net_ofi_plugin_t *pl
69306947
}
69316948

69326949
/* NVTX domain */
6933-
#if HAVE_NVTX_TRACING && NCCL_OFI_NVTX_TRACE_PER_DEV
6934-
for (int i = 0; i < this->num_rails; ++i) {
6935-
/* Create nvtx domain */
6936-
char name[64];
6937-
snprintf(name, 64, "aws-ofi-nccl dev %d_%d", dev_id, i);
6938-
this->nvtx_domain[i] = nvtxDomainCreateA(name);
6950+
#if HAVE_NVTX_TRACING
6951+
if (ofi_nccl_nvtx_trace_dimension() == NVTX_TRACE_DIMENSION::PER_DEV) {
6952+
for (int i = 0; i < this->num_rails; ++i) {
6953+
/* Create nvtx domain */
6954+
char buf[64];
6955+
snprintf(buf, 64, "aws-ofi-nccl dev %d_%d", dev_id, i);
6956+
this->nvtx_domain[i] = nvtxDomainCreateA(buf);
6957+
}
69396958
}
69406959
#endif
69416960
}

0 commit comments

Comments
 (0)