add_SwanLabCallback

JunnYu · JunnYu · commit fc425c4324ba · 2025-09-22T13:35:03.000+08:00
diff --git a/paddlenlp/trainer/integrations.py b/paddlenlp/trainer/integrations.py
@@ -47,6 +47,10 @@ def is_ray_available():
     return importlib.util.find_spec("ray.air") is not None
 
 
+def is_swanlab_available():
+    return importlib.util.find_spec("swanlab") is not None
+
+
 def get_available_reporting_integrations():
     integrations = []
     if is_visualdl_available():
@@ -55,7 +59,8 @@ def get_available_reporting_integrations():
         integrations.append("wandb")
     if is_tensorboardX_available():
         integrations.append("tensorboard")
-
+    if is_swanlab_available():
+        integrations.append("swanlab")
     return integrations
 
 
@@ -410,11 +415,147 @@ def on_evaluate(self, args, state, control, **kwargs):
             self.session.report(metrics)
 
 
+class SwanLabCallback(TrainerCallback):
+    """
+    A [`TrainerCallback`] that logs metrics, media, model checkpoints to [SwanLab](https://swanlab.cn/).
+    """
+
+    def __init__(self):
+        if not is_swanlab_available():
+            raise RuntimeError("SwanLabCallback requires swanlab to be installed. Run `pip install swanlab`.")
+        import swanlab
+
+        self._swanlab = swanlab
+        self._initialized = False
+        self._log_model = os.getenv("SWANLAB_LOG_MODEL", None)
+
+    def setup(self, args, state, model, **kwargs):
+        """
+        Setup the optional SwanLab (*swanlab*) integration.
+
+        One can subclass and override this method to customize the setup if needed. Find more information
+        [here](https://docs.swanlab.cn/guide_cloud/integration/integration-huggingface-transformers.html).
+
+        You can also override the following environment variables. Find more information about environment
+        variables [here](https://docs.swanlab.cn/en/api/environment-variable.html#environment-variables)
+
+        Environment:
+        - **SWANLAB_API_KEY** (`str`, *optional*, defaults to `None`):
+            Cloud API Key. During login, this environment variable is checked first. If it doesn't exist, the system
+            checks if the user is already logged in. If not, the login process is initiated.
+
+                - If a string is passed to the login interface, this environment variable is ignored.
+                - If the user is already logged in, this environment variable takes precedence over locally stored
+                login information.
+
+        - **SWANLAB_PROJECT** (`str`, *optional*, defaults to `None`):
+            Set this to a custom string to store results in a different project. If not specified, the name of the current
+            running directory is used.
+
+        - **SWANLAB_LOG_DIR** (`str`, *optional*, defaults to `swanlog`):
+            This environment variable specifies the storage path for log files when running in local mode.
+            By default, logs are saved in a folder named swanlog under the working directory.
+
+        - **SWANLAB_MODE** (`Literal["local", "cloud", "disabled"]`, *optional*, defaults to `cloud`):
+            SwanLab's parsing mode, which involves callbacks registered by the operator. Currently, there are three modes:
+            local, cloud, and disabled. Note: Case-sensitive. Find more information
+            [here](https://docs.swanlab.cn/en/api/py-init.html#swanlab-init)
+
+        - **SWANLAB_LOG_MODEL** (`str`, *optional*, defaults to `None`):
+            SwanLab does not currently support the save mode functionality.This feature will be available in a future
+            release
+
+        - **SWANLAB_WEB_HOST** (`str`, *optional*, defaults to `None`):
+            Web address for the SwanLab cloud environment for private version (its free)
+
+        - **SWANLAB_API_HOST** (`str`, *optional*, defaults to `None`):
+            API address for the SwanLab cloud environment for private version (its free)
+
+        """
+        self._initialized = True
+
+        if state.is_world_process_zero:
+            logger.info('Automatic SwanLab logging enabled, to disable set os.environ["SWANLAB_MODE"] = "disabled"')
+            combined_dict = {**args.to_dict()}
+
+            if hasattr(model, "config") and model.config is not None:
+                model_config = model.config if isinstance(model.config, dict) else model.config.to_dict()
+                combined_dict = {**model_config, **combined_dict}
+            if hasattr(model, "lora_config") and model.lora_config is not None:
+                lora_config = model.lora_config if isinstance(model.lora_config, dict) else model.lora_config.to_dict()
+                combined_dict = {**{"lora_config": lora_config}, **combined_dict}
+            trial_name = state.trial_name
+            init_args = {}
+            if trial_name is not None and args.run_name is not None:
+                init_args["experiment_name"] = f"{args.run_name}-{trial_name}"
+            elif args.run_name is not None:
+                init_args["experiment_name"] = args.run_name
+            elif trial_name is not None:
+                init_args["experiment_name"] = trial_name
+            init_args["project"] = os.getenv("SWANLAB_PROJECT", "PaddleNLP")
+            if args.logging_dir is not None:
+                init_args["logdir"] = os.getenv("SWANLAB_LOG_DIR", args.logging_dir)
+
+            if self._swanlab.get_run() is None:
+                self._swanlab.init(
+                    **init_args,
+                )
+            # show paddlenlp logo!
+            self._swanlab.config["FRAMEWORK"] = "paddlenlp"
+            # add config parameters (run may have been created manually)
+            self._swanlab.config.update(combined_dict)
+
+    def on_train_begin(self, args, state, control, model=None, **kwargs):
+        if not self._initialized:
+            self.setup(args, state, model, **kwargs)
+
+    def on_train_end(self, args, state, control, model=None, processing_class=None, **kwargs):
+        if self._log_model is not None and self._initialized and state.is_world_process_zero:
+            logger.warning(
+                "SwanLab does not currently support the save mode functionality. "
+                "This feature will be available in a future release."
+            )
+
+    def on_log(self, args, state, control, model=None, logs=None, **kwargs):
+        single_value_scalars = [
+            "train_runtime",
+            "train_samples_per_second",
+            "train_steps_per_second",
+            "train_loss",
+            "total_flos",
+        ]
+
+        if not self._initialized:
+            self.setup(args, state, model)
+        if state.is_world_process_zero:
+            for k, v in logs.items():
+                if k in single_value_scalars:
+                    self._swanlab.log({f"single_value/{k}": v}, step=state.global_step)
+            non_scalar_logs = {k: v for k, v in logs.items() if k not in single_value_scalars}
+            non_scalar_logs = rewrite_logs(non_scalar_logs)
+            self._swanlab.log({**non_scalar_logs, "train/global_step": state.global_step}, step=state.global_step)
+
+    def on_save(self, args, state, control, **kwargs):
+        if self._log_model is not None and self._initialized and state.is_world_process_zero:
+            logger.warning(
+                "SwanLab does not currently support the save mode functionality. "
+                "This feature will be available in a future release."
+            )
+
+    def on_predict(self, args, state, control, metrics, **kwargs):
+        if not self._initialized:
+            self.setup(args, state, **kwargs)
+        if state.is_world_process_zero:
+            metrics = rewrite_logs(metrics)
+            self._swanlab.log(metrics)
+
+
 INTEGRATION_TO_CALLBACK = {
     "visualdl": VisualDLCallback,
     "autonlp": AutoNLPCallback,
     "wandb": WandbCallback,
     "tensorboard": TensorBoardCallback,
+    "swanlab": SwanLabCallback,
 }
 
 
diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py
@@ -382,7 +382,7 @@ class TrainingArguments:
             instance of `Dataset`.
         report_to (`str` or `List[str]`, *optional*, defaults to `"visualdl"`):
             The list of integrations to report the results and logs to.
-            Supported platforms are `"visualdl"`/`"wandb"`/`"tensorboard"`.
+            Supported platforms are `"visualdl"`/`"wandb"`/`"tensorboard"`/`"swanlab".
             `"none"` for no integrations.
         ddp_find_unused_parameters (`bool`, *optional*):
             When using distributed training, the value of the flag `find_unused_parameters` passed to
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -31,4 +31,5 @@ wget
 huggingface_hub>=0.19.2
 tiktoken
 tokenizers<=0.20.3; python_version<="3.8"
-tokenizers>=0.21,<0.22; python_version>"3.8"
+tokenizers>=0.21,<0.22; python_version>"3.8"
+swanlab[dashboard]
diff --git a/requirements.txt b/requirements.txt
@@ -30,3 +30,4 @@ ml_dtypes
 tokenizers<=0.20.3; python_version<="3.8"
 tokenizers>=0.21,<0.22; python_version>"3.8"
 omegaconf
+swanlab[dashboard]