Skip to content

Commit a76b473

Browse files
zhaojuanmaofacebook-github-bot
authored andcommitted
clang format reducer and logger files (pytorch#53148)
Summary: Pull Request resolved: pytorch#53148 clang format reducer and logger files ghstack-source-id: 123453983 Test Plan: unit test Reviewed By: SciPioneer Differential Revision: D26764509 fbshipit-source-id: 711efcfd77420f912861cfd20c69e3af5086f4b9
1 parent d032287 commit a76b473

File tree

4 files changed

+63
-71
lines changed

4 files changed

+63
-71
lines changed

torch/csrc/distributed/c10d/init.cpp

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#include <torch/csrc/python_headers.h>
22

33
#include <c10/util/intrusive_ptr.h>
4-
#include <c10d/Utils.hpp>
54
#include <c10d/FileStore.hpp>
65
#include <c10d/TCPStore.hpp>
6+
#include <c10d/Utils.hpp>
77
#ifndef _WIN32
88
#include <c10d/HashStore.hpp>
99
#include <c10d/ProcessGroupRoundRobin.hpp>
@@ -301,10 +301,11 @@ An enum-like class for built-in communication hooks: ``ALLREDUCE`` and ``FP16_CO
301301
"save_thread_local_state",
302302
&::c10d::Reducer::save_thread_local_state,
303303
py::call_guard<py::gil_scoped_release>())
304-
.def("_set_ddp_runtime_logging_sample_rate",
305-
&::c10d::Reducer::set_ddp_runtime_logging_sample_rate,
306-
py::arg("sample_rate"),
307-
py::call_guard<py::gil_scoped_release>());
304+
.def(
305+
"_set_ddp_runtime_logging_sample_rate",
306+
&::c10d::Reducer::set_ddp_runtime_logging_sample_rate,
307+
py::arg("sample_rate"),
308+
py::call_guard<py::gil_scoped_release>());
308309

309310
shared_ptr_class_<::c10d::Logger>(module, "Logger")
310311
.def(
@@ -323,25 +324,25 @@ An enum-like class for built-in communication hooks: ``ALLREDUCE`` and ``FP16_CO
323324
"set_runtime_stats_and_log",
324325
&::c10d::Logger::set_runtime_stats_and_log,
325326
py::call_guard<py::gil_scoped_release>())
326-
.def(
327+
.def(
327328
"_get_ddp_logging_data",
328329
&::c10d::Logger::get_ddp_logging_data,
329330
py::call_guard<py::gil_scoped_release>())
330-
.def(
331-
"_set_comm_hook_name",
332-
&::c10d::Logger::set_comm_hook,
333-
py::arg("comm_hook"),
334-
py::call_guard<py::gil_scoped_release>());
331+
.def(
332+
"_set_comm_hook_name",
333+
&::c10d::Logger::set_comm_hook,
334+
py::arg("comm_hook"),
335+
py::call_guard<py::gil_scoped_release>());
335336

336337
py::enum_<::c10d::DistributedDebugLevel>(module, "_DistributedDebugLevel", R"(
337338
An enum whose values correspond to different debug settings of the
338339
torch.distributed package. Currently supporting settings are OFF, INFO,
339340
and DETAIL, which can be set via the TORCH_DISTRIBUTED_DEBUG environment
340341
variable.
341342
)")
342-
.value("OFF", ::c10d::DistributedDebugLevel::OFF)
343-
.value("INFO", ::c10d::DistributedDebugLevel::INFO)
344-
.value("DETAIL", ::c10d::DistributedDebugLevel::DETAIL);
343+
.value("OFF", ::c10d::DistributedDebugLevel::OFF)
344+
.value("INFO", ::c10d::DistributedDebugLevel::INFO)
345+
.value("DETAIL", ::c10d::DistributedDebugLevel::DETAIL);
345346

346347
module.def(
347348
"_get_debug_mode",
@@ -1283,7 +1284,7 @@ that adds a prefix to each key inserted to the store.
12831284
Note that ``fut.done()`` returns only whether the operation has been enqueued on the GPU.
12841285
)");
12851286

1286-
py::class_<c10::DDPLoggingData>(module, "DDPLoggingData")
1287+
py::class_<c10::DDPLoggingData>(module, "DDPLoggingData")
12871288
.def(py::init<>())
12881289
.def_readwrite("world_size", &c10::DDPLoggingData::world_size)
12891290
.def_readwrite("rank", &c10::DDPLoggingData::rank)
@@ -1344,18 +1345,13 @@ py::class_<c10::DDPLoggingData>(module, "DDPLoggingData")
13441345
.def_readwrite(
13451346
"avg_backward_compute_comm_overlap_time",
13461347
&c10::DDPLoggingData::avg_backward_compute_comm_overlap_time)
1348+
.def_readwrite("comm_hook", &c10::DDPLoggingData::comm_hook)
13471349
.def_readwrite(
1348-
"comm_hook",
1349-
&c10::DDPLoggingData::comm_hook)
1350-
.def_readwrite(
1351-
"forward_compute_time",
1352-
&c10::DDPLoggingData::forward_compute_time)
1350+
"forward_compute_time", &c10::DDPLoggingData::forward_compute_time)
13531351
.def_readwrite(
1354-
"backward_compute_time",
1355-
&c10::DDPLoggingData::backward_compute_time)
1352+
"backward_compute_time", &c10::DDPLoggingData::backward_compute_time)
13561353
.def_readwrite(
1357-
"backward_comm_time",
1358-
&c10::DDPLoggingData::backward_comm_time)
1354+
"backward_comm_time", &c10::DDPLoggingData::backward_comm_time)
13591355
.def_readwrite(
13601356
"backward_compute_comm_overlap_time",
13611357
&c10::DDPLoggingData::backward_compute_comm_overlap_time)

torch/lib/c10d/logger.cpp

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#include <c10d/logger.hpp>
21
#include <c10d/Utils.hpp>
2+
#include <c10d/logger.hpp>
33
#include <fmt/format.h>
44

55
namespace c10d {
@@ -14,25 +14,21 @@ const int kMilliSecondToNanosSecond = 1000000;
1414

1515
} // anonymous namespace
1616

17-
std::ostream& operator<<(
18-
std::ostream& output,
19-
const Logger& logger
20-
) {
17+
std::ostream& operator<<(std::ostream& output, const Logger& logger) {
2118
auto& ddp_logging_data = logger.ddp_logging_data_;
2219

2320
std::string loggerInfo = fmt::format(
24-
"[Rank {} / {}] Training {} unused_parameter_size={} \n "
25-
"Avg forward compute time: {} \n Avg backward compute time: {} \n"
26-
"Avg backward comm. time: {} \n Avg backward comm/comp overlap time: {}",
27-
ddp_logging_data->rank,
28-
ddp_logging_data->world_size,
29-
ddp_logging_data->module_name,
30-
ddp_logging_data->unused_parameter_size,
31-
ddp_logging_data->avg_forward_compute_time,
32-
ddp_logging_data->avg_backward_compute_time,
33-
ddp_logging_data->avg_backward_comm_time,
34-
ddp_logging_data->avg_backward_compute_comm_overlap_time
35-
);
21+
"[Rank {} / {}] Training {} unused_parameter_size={} \n "
22+
"Avg forward compute time: {} \n Avg backward compute time: {} \n"
23+
"Avg backward comm. time: {} \n Avg backward comm/comp overlap time: {}",
24+
ddp_logging_data->rank,
25+
ddp_logging_data->world_size,
26+
ddp_logging_data->module_name,
27+
ddp_logging_data->unused_parameter_size,
28+
ddp_logging_data->avg_forward_compute_time,
29+
ddp_logging_data->avg_backward_compute_time,
30+
ddp_logging_data->avg_backward_comm_time,
31+
ddp_logging_data->avg_backward_compute_comm_overlap_time);
3632

3733
if (ddp_logging_data->comm_hook != "") {
3834
loggerInfo +=
@@ -56,7 +52,8 @@ void Logger::set_env_variables() {
5652
ddp_logging_data_->gloo_device_transport = parse_env("GLOO_DEVICE_TRANSPORT");
5753
ddp_logging_data_->nccl_socket_ifname = parse_env("NCCL_SOCKET_IFNAME");
5854
ddp_logging_data_->nccl_blocking_wait = parse_env("NCCL_BLOCKING_WAIT");
59-
ddp_logging_data_->nccl_async_error_handling = parse_env("NCCL_ASYNC_ERROR_HANDLING");
55+
ddp_logging_data_->nccl_async_error_handling =
56+
parse_env("NCCL_ASYNC_ERROR_HANDLING");
6057
ddp_logging_data_->nccl_debug = parse_env("NCCL_DEBUG");
6158
ddp_logging_data_->nccl_nthreads = parse_env("NCCL_NTHREADS");
6259
ddp_logging_data_->nccl_ib_timeout = parse_env("NCCL_IB_TIMEOUT");
@@ -124,9 +121,7 @@ void Logger::set_construction_data_and_log(
124121

125122
if (parseDistDebugLevel() != DistributedDebugLevel::OFF) {
126123
std::string initInfo = fmt::format(
127-
"[Rank {}]: DDP Initialized with: \n",
128-
ddp_logging_data_->rank
129-
);
124+
"[Rank {}]: DDP Initialized with: \n", ddp_logging_data_->rank);
130125
LOG(INFO) << initInfo << *ddp_logging_data_;
131126
}
132127

@@ -149,8 +144,7 @@ void Logger::calculate_avg_cpu_time(
149144
return;
150145
}
151146
time_duration = cpu_end_time - cpu_start_time;
152-
avg_time = (time_duration +
153-
avg_time * (num_iterations_stats_recorded_ - 1)) /
147+
avg_time = (time_duration + avg_time * (num_iterations_stats_recorded_ - 1)) /
154148
num_iterations_stats_recorded_;
155149
}
156150

@@ -172,8 +166,7 @@ void Logger::calculate_avg_gpu_time(
172166
return;
173167
}
174168
time_duration = int64_t(milliseconds * kMilliSecondToNanosSecond);
175-
avg_time = (time_duration +
176-
avg_time * (num_iterations_stats_recorded_ - 1)) /
169+
avg_time = (time_duration + avg_time * (num_iterations_stats_recorded_ - 1)) /
177170
num_iterations_stats_recorded_;
178171
}
179172
#endif
@@ -267,7 +260,7 @@ void Logger::set_runtime_stats_and_log() {
267260

268261
calculate_avg_cpu_time(
269262
ddp_logging_data_->avg_backward_compute_time,
270-
ddp_logging_data_->backward_compute_time,
263+
ddp_logging_data_->backward_compute_time,
271264
reducer_->cpu_timer_.backward_compute_start_time,
272265
reducer_->cpu_timer_.backward_compute_end_time);
273266

torch/lib/c10d/logger.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,7 @@ class Logger {
2020

2121
// Stream insertion operator for logging data to stream under
2222
// TORCH_DISTRIBUTED_DEBUG.
23-
friend std::ostream& operator<<(
24-
std::ostream& output,
25-
const Logger& logger
26-
);
23+
friend std::ostream& operator<<(std::ostream& output, const Logger& logger);
2724

2825
// Set environment variables.
2926
void set_env_variables();

torch/lib/c10d/reducer.cpp

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@ Reducer::Reducer(
5959
{
6060
std::set<int> unique_devices;
6161
for (const auto& v : replicas_[0]) {
62-
auto device_idx = int(v.device().index());
63-
if (unique_devices.find(device_idx) == unique_devices.end()) {
64-
unique_devices.insert(device_idx);
65-
if (unique_devices.size() > 1) {
66-
is_multi_device_module_ = true;
67-
break;
68-
}
62+
auto device_idx = int(v.device().index());
63+
if (unique_devices.find(device_idx) == unique_devices.end()) {
64+
unique_devices.insert(device_idx);
65+
if (unique_devices.size() > 1) {
66+
is_multi_device_module_ = true;
67+
break;
6968
}
69+
}
7070
}
7171
}
7272

@@ -423,8 +423,8 @@ void Reducer::push_rebuilt_params(const VariableIndex& index) {
423423
void Reducer::autograd_hook(VariableIndex index) {
424424
std::lock_guard<std::mutex> lock(this->mutex_);
425425

426-
// Carry over thread local state from main thread. This allows for thread-local
427-
// flags such as profiler enabled to be configure correctly.
426+
// Carry over thread local state from main thread. This allows for
427+
// thread-local flags such as profiler enabled to be configure correctly.
428428
at::ThreadLocalStateGuard g(thread_local_state_);
429429
// See Note [Skip allreducing local_used_maps_dev]
430430
if (find_unused_parameters_) {
@@ -970,7 +970,8 @@ void Reducer::prepare_for_backward(
970970
}
971971
}
972972

973-
// Warn user about unnecessary perf hit if all parameters were used in forward.
973+
// Warn user about unnecessary perf hit if all parameters were used in
974+
// forward.
974975
if (unused_parameters_.empty()) {
975976
TORCH_WARN_ONCE(
976977
"find_unused_parameters=True was specified in DDP constructor, "
@@ -1388,14 +1389,17 @@ void Reducer::ensure_prior_reduction_finished() {
13881389
// The variable `require_finalize_` is true until all gradients
13891390
// have been computed and reduction of all buckets has been kicked off.
13901391
if (require_finalize_) {
1391-
std::string kBaseErrorMsg = "Expected to have finished reduction in the prior iteration before "
1392+
std::string kBaseErrorMsg =
1393+
"Expected to have finished reduction in the prior iteration before "
13921394
"starting a new one. "
13931395
""
13941396
"This error indicates that your module has parameters that were "
13951397
"not used in producing loss. ";
1396-
std::string kOutputsNotUsedInLossErrorMsg = "making sure all "
1398+
std::string kOutputsNotUsedInLossErrorMsg =
1399+
"making sure all "
13971400
"`forward` function outputs participate in calculating loss. ";
1398-
std::string kDDPBugErrorMsg = "\nIf you already have done the above, then the distributed "
1401+
std::string kDDPBugErrorMsg =
1402+
"\nIf you already have done the above, then the distributed "
13991403
"data parallel module wasn't able to locate the output tensors in the "
14001404
"return value of your module's `forward` function. "
14011405
"Please include the loss function and the structure of the return "
@@ -1405,7 +1409,8 @@ void Reducer::ensure_prior_reduction_finished() {
14051409
if (!find_unused_parameters_) {
14061410
// Parameters may have been unused in forward pass, or not all outputs
14071411
// were used in producing loss.
1408-
kBaseErrorMsg += "You can enable unused parameter detection by passing the "
1412+
kBaseErrorMsg +=
1413+
"You can enable unused parameter detection by passing the "
14091414
"keyword argument `find_unused_parameters=True` to "
14101415
"`torch.nn.parallel.DistributedDataParallel`, and by \n";
14111416
kBaseErrorMsg += kOutputsNotUsedInLossErrorMsg;
@@ -1414,7 +1419,8 @@ void Reducer::ensure_prior_reduction_finished() {
14141419
// Note that it does not really matter whether unused_parameters_.empty(),
14151420
// since user may have enabled detection but this particular iteration
14161421
// could have used or not used all parameters.
1417-
kBaseErrorMsg += "Since `find_unused_parameters=True` is enabled, this likely "
1422+
kBaseErrorMsg +=
1423+
"Since `find_unused_parameters=True` is enabled, this likely "
14181424
" means that not all `forward` outputs participate in computing loss. You can fix this by ";
14191425
kBaseErrorMsg += kOutputsNotUsedInLossErrorMsg;
14201426
kBaseErrorMsg += kDDPBugErrorMsg;
@@ -1433,8 +1439,8 @@ int Reducer::get_ddp_runtime_logging_sample_rate() {
14331439

14341440
bool Reducer::should_collect_runtime_stats() {
14351441
if (num_iterations_ > 0 &&
1436-
(num_iterations_ <= 10 ||
1437-
num_iterations_ % get_ddp_runtime_logging_sample_rate() == 0)) {
1442+
(num_iterations_ <= 10 ||
1443+
num_iterations_ % get_ddp_runtime_logging_sample_rate() == 0)) {
14381444
return true;
14391445
}
14401446
return false;

0 commit comments

Comments
 (0)