diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml index d12e01544..aaecf9810 100644 --- a/.github/workflows/e2e_test.yaml +++ b/.github/workflows/e2e_test.yaml @@ -1,5 +1,15 @@ name: ete_test + +permissions: + contents: write + pages: write + id-token: write + on: + pull_request: + branches: + - "main" + - "refactor" workflow_dispatch: inputs: repo_org: @@ -19,18 +29,40 @@ jobs: ete_test: if: ${{!cancelled() }} runs-on: [h_cluster_ete] + permissions: + contents: write + pages: write + id-token: write steps: - name: Clean workdir run: sudo git clean -ffdx - name: Clone repository uses: actions/checkout@v2 - with: - repository: ${{ github.event.inputs.repo_org || 'InternLM/xtuner' }} - ref: ${{github.event.inputs.repo_ref || 'main'}} + #with: + #repository: ${{ github.event.inputs.repo_org || 'InternLM/xtuner' }} + #ref: ${{github.event.inputs.repo_ref || 'main'}} - name: run-test run: | source /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/miniconda3/bin/activate conda activate clusterx conda env list unset HTTP_PROXY;unset HTTPS_PROXY;unset http_proxy;unset https_proxy; - pytest autotest/test_all.py -m all -n 1 -vv --run_id ${{ github.run_id }} + pytest autotest/test_all.py::test_all[qwen3-sft] -m all -n 1 -vv --run_id ${{ github.run_id }} + + - name: Upload Artifacts + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + path: ${{ github.workspace }}/${{ github.run_id }} + if-no-files-found: ignore + retention-days: 7 + name: xtuner-e2e-${{ github.run_id }} + + - name: Deploy to GitHub Pages + if: ${{ !cancelled() }} + uses: JamesIves/github-pages-deploy-action@v4 + with: + token: ${{ github.token }} + branch: gh-pages + folder: ./${{ github.run_id }} + target-folder: ${{ github.run_id }} diff --git a/autotest/module/train.py b/autotest/module/train.py index 410ccf1fe..db6495e76 100644 --- a/autotest/module/train.py +++ b/autotest/module/train.py @@ -56,7 +56,7 @@ def validate(config): ) cur_path = os.path.join(get_latest_subdir(work_dir), "logs/exp_tracking/rank0/tracker.jsonl") check_metrics = config.get("assert_info", {}).get("check_metrics", {}) - return check_result(base_path, cur_path, check_metrics) + return check_result(config["case_name"], base_path, cur_path, check_metrics) def pre_action(config=None): action_info = config.get("pre_action", None) diff --git a/autotest/utils/check_metric.py b/autotest/utils/check_metric.py index cc2b6d57d..2c3ec0ec5 100644 --- a/autotest/utils/check_metric.py +++ b/autotest/utils/check_metric.py @@ -1,8 +1,12 @@ import json import logging +import os +import shutil +import matplotlib.pyplot as plt +import numpy as np +from pathlib import Path from statistics import mean - logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) @@ -21,8 +25,76 @@ def extract_value(file, metrics): return total_step, metric_all +def plot_all(case_name, check_metric, base_metrics, cur_metrics, output_root: Path): + metric_list = list(check_metric.keys()) + n_plots = len(metric_list) + n_cols = int(np.ceil(np.sqrt(n_plots))) + n_rows = int(np.ceil(n_plots / n_cols)) + fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3)) + axes = np.array(axes).flatten() + + for i, ax in enumerate(axes): + if i < n_plots: + x_base = np.arange(len(base_metrics[metric_list[i]])) + x_current = np.arange(len(cur_metrics[metric_list[i]])) + ax.plot( + x_base, + base_metrics[metric_list[i]], + "r--", + label="Base", + marker="x", + markersize=4, + ) + ax.plot( + x_current, + cur_metrics[metric_list[i]], + "b-", + label="Current", + marker="o", + markersize=4, + ) + ax.set_title(f"{metric_list[i].replace('/', '_')}_comparison") + ax.set_xlabel("Step") + ax.set_ylabel("Value") + ax.legend() + ax.grid(True, linestyle="--", alpha=0.7) + else: + ax.axis("off") + fig.suptitle(f"{case_name}_metrics_comparison", fontsize=16) + plt.tight_layout() + plt.savefig(output_root / f"{case_name}_comparison.png") + plt.close() + + +def write_to_summary(case_name, base_jsonl, cur_jsonl ): + + summary_file = os.environ.get('GITHUB_STEP_SUMMARY', './tmp.md') + with open(summary_file, 'a') as f: + f.write(f"## {case_name}指标比较图\n") + f.write('
\n') + f.write(f'\n') + f.write('
\n
\n') + f.write(f'
\n📊 点击查看用例{case_name}指标数据,依次为基线、当前版本数据\n\n') -def check_result(base_path, cur_path, check_metric): + for json_f in [base_jsonl, cur_jsonl]: + with open(json_f, 'r', encoding='utf-8') as f: + lines = [line.strip() for line in f if line.strip()] + + md_content = '```json\n' + for i, line in enumerate(lines, 1): + md_content += f'{line}\n' + + md_content += '```\n\n' + + + with open(summary_file, 'a', encoding='utf-8') as f: + f.write(md_content) + with open(summary_file, 'a') as f: + f.write('
\n') + + +def check_result(case_name, base_path, cur_path, check_metric): fail_metric = {} check_metric = check_metric metric_list = list(check_metric.keys()) @@ -32,6 +104,12 @@ def check_result(base_path, cur_path, check_metric): f"current steps is not equal to base steps, current steps: {cur_steps}, base steps: {base_steps}" ) + output_path = Path(f"../{os.environ['GITHUB_RUN_ID']}") + output_path.mkdir(parents=True, exist_ok=True) + plot_all(case_name, check_metric, base_metrics, cur_metrics, output_path) + shutil.copytree(output_path, f"./{os.environ['GITHUB_RUN_ID']}", dirs_exist_ok=True) + write_to_summary(case_name, base_path, cur_path) + for metric, threshold in check_metric.items(): max_error = 0.0 max_error_idx = 0 @@ -75,4 +153,4 @@ def check_result(base_path, cur_path, check_metric): return result, f"Some metric check failed,{fail_metric}" if __name__ == "__main__": - print(check_result("./base//tracker.jsonl","./current/tracker.jsonl",{"grad_norm":0.000001,"loss/reduced_llm_loss":0.000001,"lr":0,"memory/max_memory_GB":0.2,"runtime_info/tgs":0.05,"runtime_info/text_tokens":0})) + print(check_result("qwen3-sft", "./base/tracker.jsonl", "./current/tracker.jsonl",{"grad_norm":0.000001,"loss/reduced_llm_loss":0.000001,"lr":0,"memory/max_memory_GB":0.2,"runtime_info/tgs":0.05,"runtime_info/text_tokens":0}))