From 9717d1232570820dd9b4bc2e3a0b26ceab11b7de Mon Sep 17 00:00:00 2001
From: HAOCHENYE <21724054@zju.edu.cn>
Date: Fri, 21 Nov 2025 07:20:42 +0000
Subject: [PATCH 1/2] [Fix] Add `quantization_config` when saving fp8 weight

---
 xtuner/v1/model/base.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/xtuner/v1/model/base.py b/xtuner/v1/model/base.py
index db4d4c717..df729c23c 100644
--- a/xtuner/v1/model/base.py
+++ b/xtuner/v1/model/base.py
@@ -178,7 +178,7 @@ def hf_config(self) -> PretrainedConfig | None:
         """HuggingFace configuration."""
         return None
 
-    def save_hf(self, hf_path: str | Path):
+    def save_hf(self, hf_path: str | Path, dtype: torch.dtype = torch.bfloat16):
         """Save the configuration to a HuggingFace-compatible format.
 
         Args:
@@ -188,7 +188,20 @@ def save_hf(self, hf_path: str | Path):
         if self.hf_config is None:
             raise NotImplementedError("The `hf_config` property must be implemented to save in HuggingFace format.")
 
-        self.hf_config.save_pretrained(hf_path)
+        if dtype not in {torch.bfloat16, torch.float8_e4m3fn}:
+            raise NotImplementedError(f"Saving dtype {dtype} is not supported yet.")
+
+        hf_config = self.hf_config
+        if dtype is torch.float8_e4m3fn:
+            hf_config.quantization_config = {
+                "activation_scheme": "dynamic",
+                "fmt": "e4m3",
+                "quant_method": "fp8",
+                "scale_fmt": "ue8m0",
+                "weight_block_size": [128, 128],
+            }
+
+        hf_config.save_pretrained(hf_path)
 
 
 class ModelOutputs(TypedDict):
@@ -919,7 +932,7 @@ def _save_hf(
                 raise RuntimeError("Internal Error, both self.config.hf_config and self._hf_path are None")
 
             if self.config.hf_config is not None:
-                self.config.save_hf(hf_dir)
+                self.config.save_hf(hf_dir, dtype=save_dtype)
             else:  # if self._hf_path is not None:
                 for file in cast(Path, self._hf_path).iterdir():
                     if file.suffix != ".safetensors":

From 32186fff55f263fe23b2da254e62530a074a62b0 Mon Sep 17 00:00:00 2001
From: HAOCHENYE <21724054@zju.edu.cn>
Date: Fri, 21 Nov 2025 07:22:22 +0000
Subject: [PATCH 2/2] [Fix] Saving tokenizer in `dcp_to_hf.py`

---
 .dev_scripts/dcp_to_hf.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.dev_scripts/dcp_to_hf.py b/.dev_scripts/dcp_to_hf.py
index a466b2907..0c1e8cec9 100644
--- a/.dev_scripts/dcp_to_hf.py
+++ b/.dev_scripts/dcp_to_hf.py
@@ -1,5 +1,6 @@
 from xtuner.v1.model import get_model_config_from_hf
 from xtuner.v1.model.moe.moe import MoEConfig
+from transformers import AutoTokenizer
 from cyclopts import App, Parameter
 from pathlib import Path
 import torch.distributed as dist
@@ -39,6 +40,12 @@ def dcp_to_hf(
             help="Path to the DCP checkpoint, <work_dirs>/<timestamp>/checkpoints/ckpt-step-6"
         ),
     ],
+    tokenizer_path: Annotated[
+        Path,
+        Parameter(
+            help="Path to the tokenizer folder, usually the same as the hf_path"
+        ),
+    ],
     hf_path: Annotated[
         Path | None,
         Parameter(
@@ -52,6 +59,7 @@ def dcp_to_hf(
         ),
     ] = "bf16",
 ):
+    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
     dist.init_process_group(backend="cuda:nccl,cpu:gloo")
     torch.serialization.add_safe_globals(
         [
@@ -98,6 +106,9 @@ def dcp_to_hf(
     else:
         model.save_hf(hf_path, save_dtype=torch.float8_e4m3fn)
 
+    if dist.get_rank() == 0:
+        tokenizer.save_pretrained(hf_path)
+
 
 if __name__ == "__main__":
     cli()