From 9717d1232570820dd9b4bc2e3a0b26ceab11b7de Mon Sep 17 00:00:00 2001 From: HAOCHENYE <21724054@zju.edu.cn> Date: Fri, 21 Nov 2025 07:20:42 +0000 Subject: [PATCH 1/2] [Fix] Add `quantization_config` when saving fp8 weight --- xtuner/v1/model/base.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/xtuner/v1/model/base.py b/xtuner/v1/model/base.py index db4d4c717..df729c23c 100644 --- a/xtuner/v1/model/base.py +++ b/xtuner/v1/model/base.py @@ -178,7 +178,7 @@ def hf_config(self) -> PretrainedConfig | None: """HuggingFace configuration.""" return None - def save_hf(self, hf_path: str | Path): + def save_hf(self, hf_path: str | Path, dtype: torch.dtype = torch.bfloat16): """Save the configuration to a HuggingFace-compatible format. Args: @@ -188,7 +188,20 @@ def save_hf(self, hf_path: str | Path): if self.hf_config is None: raise NotImplementedError("The `hf_config` property must be implemented to save in HuggingFace format.") - self.hf_config.save_pretrained(hf_path) + if dtype not in {torch.bfloat16, torch.float8_e4m3fn}: + raise NotImplementedError(f"Saving dtype {dtype} is not supported yet.") + + hf_config = self.hf_config + if dtype is torch.float8_e4m3fn: + hf_config.quantization_config = { + "activation_scheme": "dynamic", + "fmt": "e4m3", + "quant_method": "fp8", + "scale_fmt": "ue8m0", + "weight_block_size": [128, 128], + } + + hf_config.save_pretrained(hf_path) class ModelOutputs(TypedDict): @@ -919,7 +932,7 @@ def _save_hf( raise RuntimeError("Internal Error, both self.config.hf_config and self._hf_path are None") if self.config.hf_config is not None: - self.config.save_hf(hf_dir) + self.config.save_hf(hf_dir, dtype=save_dtype) else: # if self._hf_path is not None: for file in cast(Path, self._hf_path).iterdir(): if file.suffix != ".safetensors": From 32186fff55f263fe23b2da254e62530a074a62b0 Mon Sep 17 00:00:00 2001 From: HAOCHENYE <21724054@zju.edu.cn> Date: Fri, 21 Nov 2025 07:22:22 +0000 Subject: [PATCH 2/2] [Fix] Saving tokenizer in `dcp_to_hf.py` --- .dev_scripts/dcp_to_hf.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.dev_scripts/dcp_to_hf.py b/.dev_scripts/dcp_to_hf.py index a466b2907..0c1e8cec9 100644 --- a/.dev_scripts/dcp_to_hf.py +++ b/.dev_scripts/dcp_to_hf.py @@ -1,5 +1,6 @@ from xtuner.v1.model import get_model_config_from_hf from xtuner.v1.model.moe.moe import MoEConfig +from transformers import AutoTokenizer from cyclopts import App, Parameter from pathlib import Path import torch.distributed as dist @@ -39,6 +40,12 @@ def dcp_to_hf( help="Path to the DCP checkpoint, //checkpoints/ckpt-step-6" ), ], + tokenizer_path: Annotated[ + Path, + Parameter( + help="Path to the tokenizer folder, usually the same as the hf_path" + ), + ], hf_path: Annotated[ Path | None, Parameter( @@ -52,6 +59,7 @@ def dcp_to_hf( ), ] = "bf16", ): + tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) dist.init_process_group(backend="cuda:nccl,cpu:gloo") torch.serialization.add_safe_globals( [ @@ -98,6 +106,9 @@ def dcp_to_hf( else: model.save_hf(hf_path, save_dtype=torch.float8_e4m3fn) + if dist.get_rank() == 0: + tokenizer.save_pretrained(hf_path) + if __name__ == "__main__": cli()