@@ -59,14 +59,14 @@ Reducer::Reducer(
5959 {
6060 std::set<int > unique_devices;
6161 for (const auto & v : replicas_[0 ]) {
62- auto device_idx = int (v.device ().index ());
63- if (unique_devices.find (device_idx) == unique_devices.end ()) {
64- unique_devices.insert (device_idx);
65- if (unique_devices.size () > 1 ) {
66- is_multi_device_module_ = true ;
67- break ;
68- }
62+ auto device_idx = int (v.device ().index ());
63+ if (unique_devices.find (device_idx) == unique_devices.end ()) {
64+ unique_devices.insert (device_idx);
65+ if (unique_devices.size () > 1 ) {
66+ is_multi_device_module_ = true ;
67+ break ;
6968 }
69+ }
7070 }
7171 }
7272
@@ -423,8 +423,8 @@ void Reducer::push_rebuilt_params(const VariableIndex& index) {
423423void Reducer::autograd_hook (VariableIndex index) {
424424 std::lock_guard<std::mutex> lock (this ->mutex_ );
425425
426- // Carry over thread local state from main thread. This allows for thread-local
427- // flags such as profiler enabled to be configure correctly.
426+ // Carry over thread local state from main thread. This allows for
427+ // thread-local flags such as profiler enabled to be configure correctly.
428428 at::ThreadLocalStateGuard g (thread_local_state_);
429429 // See Note [Skip allreducing local_used_maps_dev]
430430 if (find_unused_parameters_) {
@@ -970,7 +970,8 @@ void Reducer::prepare_for_backward(
970970 }
971971 }
972972
973- // Warn user about unnecessary perf hit if all parameters were used in forward.
973+ // Warn user about unnecessary perf hit if all parameters were used in
974+ // forward.
974975 if (unused_parameters_.empty ()) {
975976 TORCH_WARN_ONCE (
976977 " find_unused_parameters=True was specified in DDP constructor, "
@@ -1388,14 +1389,17 @@ void Reducer::ensure_prior_reduction_finished() {
13881389 // The variable `require_finalize_` is true until all gradients
13891390 // have been computed and reduction of all buckets has been kicked off.
13901391 if (require_finalize_) {
1391- std::string kBaseErrorMsg = " Expected to have finished reduction in the prior iteration before "
1392+ std::string kBaseErrorMsg =
1393+ " Expected to have finished reduction in the prior iteration before "
13921394 " starting a new one. "
13931395 " "
13941396 " This error indicates that your module has parameters that were "
13951397 " not used in producing loss. " ;
1396- std::string kOutputsNotUsedInLossErrorMsg = " making sure all "
1398+ std::string kOutputsNotUsedInLossErrorMsg =
1399+ " making sure all "
13971400 " `forward` function outputs participate in calculating loss. " ;
1398- std::string kDDPBugErrorMsg = " \n If you already have done the above, then the distributed "
1401+ std::string kDDPBugErrorMsg =
1402+ " \n If you already have done the above, then the distributed "
13991403 " data parallel module wasn't able to locate the output tensors in the "
14001404 " return value of your module's `forward` function. "
14011405 " Please include the loss function and the structure of the return "
@@ -1405,7 +1409,8 @@ void Reducer::ensure_prior_reduction_finished() {
14051409 if (!find_unused_parameters_) {
14061410 // Parameters may have been unused in forward pass, or not all outputs
14071411 // were used in producing loss.
1408- kBaseErrorMsg += " You can enable unused parameter detection by passing the "
1412+ kBaseErrorMsg +=
1413+ " You can enable unused parameter detection by passing the "
14091414 " keyword argument `find_unused_parameters=True` to "
14101415 " `torch.nn.parallel.DistributedDataParallel`, and by \n " ;
14111416 kBaseErrorMsg += kOutputsNotUsedInLossErrorMsg ;
@@ -1414,7 +1419,8 @@ void Reducer::ensure_prior_reduction_finished() {
14141419 // Note that it does not really matter whether unused_parameters_.empty(),
14151420 // since user may have enabled detection but this particular iteration
14161421 // could have used or not used all parameters.
1417- kBaseErrorMsg += " Since `find_unused_parameters=True` is enabled, this likely "
1422+ kBaseErrorMsg +=
1423+ " Since `find_unused_parameters=True` is enabled, this likely "
14181424 " means that not all `forward` outputs participate in computing loss. You can fix this by " ;
14191425 kBaseErrorMsg += kOutputsNotUsedInLossErrorMsg ;
14201426 kBaseErrorMsg += kDDPBugErrorMsg ;
@@ -1433,8 +1439,8 @@ int Reducer::get_ddp_runtime_logging_sample_rate() {
14331439
14341440bool Reducer::should_collect_runtime_stats () {
14351441 if (num_iterations_ > 0 &&
1436- (num_iterations_ <= 10 ||
1437- num_iterations_ % get_ddp_runtime_logging_sample_rate () == 0 )) {
1442+ (num_iterations_ <= 10 ||
1443+ num_iterations_ % get_ddp_runtime_logging_sample_rate () == 0 )) {
14381444 return true ;
14391445 }
14401446 return false ;
0 commit comments