Skip to content

Incorporate 25.04 NeMo Patches #13488

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
Prev Previous commit
Apply isort and black reformatting
Signed-off-by: bdubauski <[email protected]>
  • Loading branch information
bdubauski committed May 8, 2025
commit 1c7dc6360fa7bb5b9f231306b9f3ff68d8732bc1
14 changes: 12 additions & 2 deletions scripts/performance/argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,20 @@ def bool_arg(arg):
)

parser.add_argument(
"-cps", "--checkpoint_save", type=bool_arg, help="When enabled will trigger checkpoint save operation at the end of training", required=False, default=None
"-cps",
"--checkpoint_save",
type=bool_arg,
help="When enabled will trigger checkpoint save operation at the end of training",
required=False,
default=None,
)
parser.add_argument(
"-cpl", "--checkpoint_load_path", type=str, help="Path to checkpoint to load prior to training start", required=False, default=None
"-cpl",
"--checkpoint_load_path",
type=str,
help="Path to checkpoint to load prior to training start",
required=False,
default=None,
)

def list_of_strings(arg):
Expand Down
2 changes: 1 addition & 1 deletion scripts/performance/llm/pretrain_nemotron4_15b.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def override_recipe_configs(
fp8_recipe=args.fp8_recipe,
nccl_communicator_config_path=args.nccl_communicator_config_path,
save_checkpoint=args.checkpoint_save,
load_checkpoint_path=args.checkpoint_load_path
load_checkpoint_path=args.checkpoint_load_path,
)
recipe = set_exp_logging_configs(
recipe, "pre_train", "llm", "nemotron", args.tensorboard, args.wandb, args.wandb_prj_name, args.wandb_job_name
Expand Down
2 changes: 1 addition & 1 deletion scripts/performance/llm/pretrain_nemotron4_340b.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def override_recipe_configs(
fp8_recipe=args.fp8_recipe,
nccl_communicator_config_path=args.nccl_communicator_config_path,
save_checkpoint=args.checkpoint_save,
load_checkpoint_path=args.checkpoint_load_path
load_checkpoint_path=args.checkpoint_load_path,
)
recipe = set_exp_logging_configs(
recipe, "pre_train", "llm", "nemotron", args.tensorboard, args.wandb, args.wandb_prj_name, args.wandb_job_name
Expand Down
2 changes: 1 addition & 1 deletion scripts/performance/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def set_primary_perf_configs(
recompute_modules: Optional[List[str]] = None,
nccl_communicator_config_path: str = None,
save_checkpoint: Optional[bool] = False,
load_checkpoint_path: Optional[str] = None
load_checkpoint_path: Optional[str] = None,
):
"""Set experiment configs we usually tune for performance of all models."""

Expand Down
Loading