diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml index d12e01544..aaecf9810 100644 --- a/.github/workflows/e2e_test.yaml +++ b/.github/workflows/e2e_test.yaml @@ -1,5 +1,15 @@ name: ete_test + +permissions: + contents: write + pages: write + id-token: write + on: + pull_request: + branches: + - "main" + - "refactor" workflow_dispatch: inputs: repo_org: @@ -19,18 +29,40 @@ jobs: ete_test: if: ${{!cancelled() }} runs-on: [h_cluster_ete] + permissions: + contents: write + pages: write + id-token: write steps: - name: Clean workdir run: sudo git clean -ffdx - name: Clone repository uses: actions/checkout@v2 - with: - repository: ${{ github.event.inputs.repo_org || 'InternLM/xtuner' }} - ref: ${{github.event.inputs.repo_ref || 'main'}} + #with: + #repository: ${{ github.event.inputs.repo_org || 'InternLM/xtuner' }} + #ref: ${{github.event.inputs.repo_ref || 'main'}} - name: run-test run: | source /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/miniconda3/bin/activate conda activate clusterx conda env list unset HTTP_PROXY;unset HTTPS_PROXY;unset http_proxy;unset https_proxy; - pytest autotest/test_all.py -m all -n 1 -vv --run_id ${{ github.run_id }} + pytest autotest/test_all.py::test_all[qwen3-sft] -m all -n 1 -vv --run_id ${{ github.run_id }} + + - name: Upload Artifacts + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + path: ${{ github.workspace }}/${{ github.run_id }} + if-no-files-found: ignore + retention-days: 7 + name: xtuner-e2e-${{ github.run_id }} + + - name: Deploy to GitHub Pages + if: ${{ !cancelled() }} + uses: JamesIves/github-pages-deploy-action@v4 + with: + token: ${{ github.token }} + branch: gh-pages + folder: ./${{ github.run_id }} + target-folder: ${{ github.run_id }} diff --git a/autotest/module/train.py b/autotest/module/train.py index 410ccf1fe..db6495e76 100644 --- a/autotest/module/train.py +++ b/autotest/module/train.py @@ -56,7 +56,7 @@ def validate(config): ) cur_path = os.path.join(get_latest_subdir(work_dir), "logs/exp_tracking/rank0/tracker.jsonl") check_metrics = config.get("assert_info", {}).get("check_metrics", {}) - return check_result(base_path, cur_path, check_metrics) + return check_result(config["case_name"], base_path, cur_path, check_metrics) def pre_action(config=None): action_info = config.get("pre_action", None) diff --git a/autotest/utils/check_metric.py b/autotest/utils/check_metric.py index cc2b6d57d..2c3ec0ec5 100644 --- a/autotest/utils/check_metric.py +++ b/autotest/utils/check_metric.py @@ -1,8 +1,12 @@ import json import logging +import os +import shutil +import matplotlib.pyplot as plt +import numpy as np +from pathlib import Path from statistics import mean - logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) @@ -21,8 +25,76 @@ def extract_value(file, metrics): return total_step, metric_all +def plot_all(case_name, check_metric, base_metrics, cur_metrics, output_root: Path): + metric_list = list(check_metric.keys()) + n_plots = len(metric_list) + n_cols = int(np.ceil(np.sqrt(n_plots))) + n_rows = int(np.ceil(n_plots / n_cols)) + fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3)) + axes = np.array(axes).flatten() + + for i, ax in enumerate(axes): + if i < n_plots: + x_base = np.arange(len(base_metrics[metric_list[i]])) + x_current = np.arange(len(cur_metrics[metric_list[i]])) + ax.plot( + x_base, + base_metrics[metric_list[i]], + "r--", + label="Base", + marker="x", + markersize=4, + ) + ax.plot( + x_current, + cur_metrics[metric_list[i]], + "b-", + label="Current", + marker="o", + markersize=4, + ) + ax.set_title(f"{metric_list[i].replace('/', '_')}_comparison") + ax.set_xlabel("Step") + ax.set_ylabel("Value") + ax.legend() + ax.grid(True, linestyle="--", alpha=0.7) + else: + ax.axis("off") + fig.suptitle(f"{case_name}_metrics_comparison", fontsize=16) + plt.tight_layout() + plt.savefig(output_root / f"{case_name}_comparison.png") + plt.close() + + +def write_to_summary(case_name, base_jsonl, cur_jsonl ): + + summary_file = os.environ.get('GITHUB_STEP_SUMMARY', './tmp.md') + with open(summary_file, 'a') as f: + f.write(f"## {case_name}指标比较图\n") + f.write('