From 7388b60c46b3a1f5039ad5db3f6bc22ffa36c13f Mon Sep 17 00:00:00 2001 From: ZeYi Lin <944270057@qq.com> Date: Wed, 14 May 2025 17:00:54 +0800 Subject: [PATCH 1/5] feat: add swanlabcallback --- paddlenlp/trainer/integrations.py | 89 +++++++++++++++++++++ paddlenlp/trainer/training_args.py | 8 +- tests/trainer/test_trainer_visualization.py | 29 +++++++ 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/paddlenlp/trainer/integrations.py b/paddlenlp/trainer/integrations.py index 2b19e890ce7d..d08d72d9b497 100644 --- a/paddlenlp/trainer/integrations.py +++ b/paddlenlp/trainer/integrations.py @@ -42,6 +42,8 @@ def is_wandb_available(): return False return importlib.util.find_spec("wandb") is not None +def is_swanlab_available(): + return importlib.util.find_spec("swanlab") is not None def is_ray_available(): return importlib.util.find_spec("ray.air") is not None @@ -55,6 +57,8 @@ def get_available_reporting_integrations(): integrations.append("wandb") if is_tensorboardX_available(): integrations.append("tensorboard") + if is_swanlab_available(): + integrations.append("swanlab") return integrations @@ -395,6 +399,90 @@ def on_save(self, args, state, control, **kwargs): self._wandb.log_artifact(artifact, aliases=[f"checkpoint-{state.global_step}"]) +class SwanLabCallback(TrainerCallback): + """ + A [`TrainerCallback`] that logs metrics, media to [Swanlab](https://swanlab.com/). + """ + + def __init__(self): + has_swanlab = is_swanlab_available() + if not has_swanlab: + raise RuntimeError("SwanlabCallback requires swanlab to be installed. Run `pip install swanlab`.") + if has_swanlab: + import swanlab + + self._swanlab = swanlab + + self._initialized = False + + def setup(self, args, state, model, **kwargs): + """ + Setup the optional Swanlab integration. + + One can subclass and override this method to customize the setup if needed. + variables: + Environment: + - **SWANLAB_MODE** (`str`, *optional*, defaults to `"cloud"`): + Whether to use swanlab cloud, local or disabled. Set `SWANLAB_MODE="local"` to use local. Set `SWANLAB_MODE="disabled"` to disable. + - **SWANLAB_PROJECT** (`str`, *optional*, defaults to `"PaddleNLP"`): + Set this to a custom string to store results in a different project. + """ + + if self._swanlab is None: + return + + if args.swanlab_api_key: + self._swanlab.login(api_key=args.swanlab_api_key) + + self._initialized = True + + if state.is_world_process_zero: + logger.info( + 'Automatic Swanlab logging enabled, to disable set os.environ["SWANLAB_MODE"] = "disabled"' + ) + + combined_dict = {**args.to_dict()} + + if hasattr(model, "config") and model.config is not None: + model_config = model.config.to_dict() + combined_dict = {**model_config, **combined_dict} + + trial_name = state.trial_name + init_args = {} + if trial_name is not None: + init_args["name"] = trial_name + init_args["group"] = args.run_name + else: + if not (args.run_name is None or args.run_name == args.output_dir): + init_args["name"] = args.run_name + init_args["dir"] = args.logging_dir + if self._swanlab.run is None: + self._swanlab.init( + project=os.getenv("SWANLAB_PROJECT", "PaddleNLP"), + **init_args, + ) + self._swanlab.config.update(combined_dict, allow_val_change=True) + + def on_train_begin(self, args, state, control, model=None, **kwargs): + if self._swanlab is None: + return + if not self._initialized: + self.setup(args, state, model, **kwargs) + + def on_train_end(self, args, state, control, model=None, tokenizer=None, **kwargs): + if self._swanlab is None: + return + + def on_log(self, args, state, control, model=None, logs=None, **kwargs): + if self._swanlab is None: + return + if not self._initialized: + self.setup(args, state, model) + if state.is_world_process_zero: + logs = rewrite_logs(logs) + self._swanlab.log({**logs, "train/global_step": state.global_step}, step=state.global_step) + + class AutoNLPCallback(TrainerCallback): """ A [`TrainerCallback`] that sends the logs to [`Ray Tune`] for [`AutoNLP`] @@ -423,6 +511,7 @@ def on_evaluate(self, args, state, control, **kwargs): "autonlp": AutoNLPCallback, "wandb": WandbCallback, "tensorboard": TensorBoardCallback, + "swanlab": SwanLabCallback, } diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py index 6f528f939d85..ce3cb39987b6 100644 --- a/paddlenlp/trainer/training_args.py +++ b/paddlenlp/trainer/training_args.py @@ -376,7 +376,7 @@ class TrainingArguments: instance of `Dataset`. report_to (`str` or `List[str]`, *optional*, defaults to `"visualdl"`): The list of integrations to report the results and logs to. - Supported platforms are `"visualdl"`/`"wandb"`/`"tensorboard"`. + Supported platforms are `"visualdl"`/`"wandb"`/`"tensorboard"`/`"swanlab"`. `"none"` for no integrations. ddp_find_unused_parameters (`bool`, *optional*): When using distributed training, the value of the flag `find_unused_parameters` passed to @@ -385,6 +385,8 @@ class TrainingArguments: Weights & Biases (WandB) API key(s) for authentication with the WandB service. wandb_http_proxy (`str`, *optional*): Weights & Biases (WandB) http proxy for connecting with the WandB service. + swanlab_api_key (`str`, *optional*): + Swanlab API key for authentication with the Swanlab service. resume_from_checkpoint (`str`, *optional*): The path to a folder with a valid checkpoint for your model. This argument is not directly used by [`Trainer`], it's intended to be used by your training/evaluation scripts instead. See the [example @@ -888,6 +890,10 @@ class TrainingArguments: default=None, metadata={"help": "Weights & Biases (WandB) http proxy for connecting with the WandB service."}, ) + swanlab_api_key: Optional[str] = field( + default=None, + metadata={"help": "Swanlab API key for authentication with the Swanlab service."}, + ) resume_from_checkpoint: Optional[str] = field( default=None, metadata={"help": "The path to a folder with a valid checkpoint for your model."}, diff --git a/tests/trainer/test_trainer_visualization.py b/tests/trainer/test_trainer_visualization.py index ea5dc4900316..2f0c70ba2160 100644 --- a/tests/trainer/test_trainer_visualization.py +++ b/tests/trainer/test_trainer_visualization.py @@ -25,6 +25,7 @@ TensorBoardCallback, VisualDLCallback, WandbCallback, + SwanLabCallback, ) from tests.trainer.trainer_utils import RegressionModelConfig, RegressionPretrainedModel @@ -65,6 +66,34 @@ def test_wandbcallback(self): os.environ.pop("WANDB_MODE", None) shutil.rmtree(output_dir) +class TestSwanlabCallback(unittest.TestCase): + def test_swanlabcallback(self): + output_dir = tempfile.mkdtemp() + args = TrainingArguments( + output_dir=output_dir, + max_steps=200, + logging_steps=20, + run_name="test_swanlabcallback", + logging_dir=output_dir, + ) + state = TrainerState(trial_name="PaddleNLP") + control = TrainerControl() + config = RegressionModelConfig(a=1, b=1) + model = RegressionPretrainedModel(config) + os.environ["SWANLAB_MODE"] = "disabled" + swanlabcallback = SwanLabCallback() + self.assertFalse(swanlabcallback._initialized) + swanlabcallback.on_train_begin(args, state, control) + self.assertTrue(swanlabcallback._initialized) + for global_step in range(args.max_steps): + state.global_step = global_step + if global_step % args.logging_steps == 0: + log = {"loss": 100 - 0.4 * global_step, "learning_rate": 0.1, "global_step": global_step} + swanlabcallback.on_log(args, state, control, logs=log) + swanlabcallback.on_train_end(args, state, control, model=model) + swanlabcallback._swanlab.finish() + os.environ.pop("SWANLAB_MODE", None) + shutil.rmtree(output_dir) class TestTensorboardCallback(unittest.TestCase): def test_tbcallback(self): From 9b3641edff60d447388972f67d16361707f14188 Mon Sep 17 00:00:00 2001 From: ZeYi Lin <944270057@qq.com> Date: Wed, 14 May 2025 19:03:53 +0800 Subject: [PATCH 2/5] fix run --- paddlenlp/trainer/integrations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddlenlp/trainer/integrations.py b/paddlenlp/trainer/integrations.py index d08d72d9b497..abbb8348cccd 100644 --- a/paddlenlp/trainer/integrations.py +++ b/paddlenlp/trainer/integrations.py @@ -456,7 +456,7 @@ def setup(self, args, state, model, **kwargs): if not (args.run_name is None or args.run_name == args.output_dir): init_args["name"] = args.run_name init_args["dir"] = args.logging_dir - if self._swanlab.run is None: + if self._swanlab.get_run() is None: self._swanlab.init( project=os.getenv("SWANLAB_PROJECT", "PaddleNLP"), **init_args, From eec30ab3e1253f9e06b4efe43f4b3e1c75d3cd7e Mon Sep 17 00:00:00 2001 From: ZeYi Lin <944270057@qq.com> Date: Wed, 14 May 2025 19:35:48 +0800 Subject: [PATCH 3/5] fix lint --- paddlenlp/trainer/integrations.py | 36 ++++++++++----------- tests/trainer/test_trainer_visualization.py | 4 ++- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/paddlenlp/trainer/integrations.py b/paddlenlp/trainer/integrations.py index abbb8348cccd..5ba138189fbd 100644 --- a/paddlenlp/trainer/integrations.py +++ b/paddlenlp/trainer/integrations.py @@ -42,9 +42,11 @@ def is_wandb_available(): return False return importlib.util.find_spec("wandb") is not None + def is_swanlab_available(): return importlib.util.find_spec("swanlab") is not None + def is_ray_available(): return importlib.util.find_spec("ray.air") is not None @@ -410,15 +412,15 @@ def __init__(self): raise RuntimeError("SwanlabCallback requires swanlab to be installed. Run `pip install swanlab`.") if has_swanlab: import swanlab - + self._swanlab = swanlab - + self._initialized = False - + def setup(self, args, state, model, **kwargs): """ Setup the optional Swanlab integration. - + One can subclass and override this method to customize the setup if needed. variables: Environment: @@ -427,26 +429,24 @@ def setup(self, args, state, model, **kwargs): - **SWANLAB_PROJECT** (`str`, *optional*, defaults to `"PaddleNLP"`): Set this to a custom string to store results in a different project. """ - + if self._swanlab is None: return - + if args.swanlab_api_key: self._swanlab.login(api_key=args.swanlab_api_key) - + self._initialized = True - + if state.is_world_process_zero: - logger.info( - 'Automatic Swanlab logging enabled, to disable set os.environ["SWANLAB_MODE"] = "disabled"' - ) - + logger.info('Automatic Swanlab logging enabled, to disable set os.environ["SWANLAB_MODE"] = "disabled"') + combined_dict = {**args.to_dict()} - + if hasattr(model, "config") and model.config is not None: model_config = model.config.to_dict() combined_dict = {**model_config, **combined_dict} - + trial_name = state.trial_name init_args = {} if trial_name is not None: @@ -462,17 +462,17 @@ def setup(self, args, state, model, **kwargs): **init_args, ) self._swanlab.config.update(combined_dict, allow_val_change=True) - + def on_train_begin(self, args, state, control, model=None, **kwargs): if self._swanlab is None: return if not self._initialized: self.setup(args, state, model, **kwargs) - + def on_train_end(self, args, state, control, model=None, tokenizer=None, **kwargs): if self._swanlab is None: return - + def on_log(self, args, state, control, model=None, logs=None, **kwargs): if self._swanlab is None: return @@ -481,7 +481,7 @@ def on_log(self, args, state, control, model=None, logs=None, **kwargs): if state.is_world_process_zero: logs = rewrite_logs(logs) self._swanlab.log({**logs, "train/global_step": state.global_step}, step=state.global_step) - + class AutoNLPCallback(TrainerCallback): """ diff --git a/tests/trainer/test_trainer_visualization.py b/tests/trainer/test_trainer_visualization.py index 2f0c70ba2160..89f1264729b1 100644 --- a/tests/trainer/test_trainer_visualization.py +++ b/tests/trainer/test_trainer_visualization.py @@ -22,10 +22,10 @@ from paddlenlp.trainer import TrainerControl, TrainerState, TrainingArguments from paddlenlp.trainer.integrations import ( + SwanLabCallback, TensorBoardCallback, VisualDLCallback, WandbCallback, - SwanLabCallback, ) from tests.trainer.trainer_utils import RegressionModelConfig, RegressionPretrainedModel @@ -66,6 +66,7 @@ def test_wandbcallback(self): os.environ.pop("WANDB_MODE", None) shutil.rmtree(output_dir) + class TestSwanlabCallback(unittest.TestCase): def test_swanlabcallback(self): output_dir = tempfile.mkdtemp() @@ -95,6 +96,7 @@ def test_swanlabcallback(self): os.environ.pop("SWANLAB_MODE", None) shutil.rmtree(output_dir) + class TestTensorboardCallback(unittest.TestCase): def test_tbcallback(self): output_dir = tempfile.mkdtemp() From a1609a7ac50dd22ca4ce327aa344e5d8586bf900 Mon Sep 17 00:00:00 2001 From: ZeYi Lin <944270057@qq.com> Date: Wed, 14 May 2025 20:13:00 +0800 Subject: [PATCH 4/5] fix requirements dev --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index a69c22200908..7cec0726fe0d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -18,6 +18,7 @@ rouge tiktoken visualdl wandb +swanlab tensorboard tensorboardX modelscope From 98c32428c923b8b51aeba4c50e7129b8d3721980 Mon Sep 17 00:00:00 2001 From: ZeYi Lin <944270057@qq.com> Date: Fri, 16 May 2025 10:12:16 +0800 Subject: [PATCH 5/5] fix url --- paddlenlp/trainer/integrations.py | 5 +---- paddlenlp/trainer/training_args.py | 6 ------ 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/paddlenlp/trainer/integrations.py b/paddlenlp/trainer/integrations.py index 5ba138189fbd..f483a846f89e 100644 --- a/paddlenlp/trainer/integrations.py +++ b/paddlenlp/trainer/integrations.py @@ -403,7 +403,7 @@ def on_save(self, args, state, control, **kwargs): class SwanLabCallback(TrainerCallback): """ - A [`TrainerCallback`] that logs metrics, media to [Swanlab](https://swanlab.com/). + A [`TrainerCallback`] that logs metrics, media to [Swanlab](https://swanlab.cn/). """ def __init__(self): @@ -433,9 +433,6 @@ def setup(self, args, state, model, **kwargs): if self._swanlab is None: return - if args.swanlab_api_key: - self._swanlab.login(api_key=args.swanlab_api_key) - self._initialized = True if state.is_world_process_zero: diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py index ce3cb39987b6..7fa9fd7037b7 100644 --- a/paddlenlp/trainer/training_args.py +++ b/paddlenlp/trainer/training_args.py @@ -385,8 +385,6 @@ class TrainingArguments: Weights & Biases (WandB) API key(s) for authentication with the WandB service. wandb_http_proxy (`str`, *optional*): Weights & Biases (WandB) http proxy for connecting with the WandB service. - swanlab_api_key (`str`, *optional*): - Swanlab API key for authentication with the Swanlab service. resume_from_checkpoint (`str`, *optional*): The path to a folder with a valid checkpoint for your model. This argument is not directly used by [`Trainer`], it's intended to be used by your training/evaluation scripts instead. See the [example @@ -890,10 +888,6 @@ class TrainingArguments: default=None, metadata={"help": "Weights & Biases (WandB) http proxy for connecting with the WandB service."}, ) - swanlab_api_key: Optional[str] = field( - default=None, - metadata={"help": "Swanlab API key for authentication with the Swanlab service."}, - ) resume_from_checkpoint: Optional[str] = field( default=None, metadata={"help": "The path to a folder with a valid checkpoint for your model."},