From 54a2b29fd35e1417543c9114fa0351c6bc71add0 Mon Sep 17 00:00:00 2001 From: Berend Klein Haneveld Date: Wed, 19 Nov 2025 11:21:20 +0100 Subject: [PATCH 1/2] Add benchmark and workflow (based on observ's) --- .github/workflows/benchmark.yml | 67 +++++++++++++ .gitignore | 1 + benchmarks/benchmark.py | 166 ++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 4 files changed, 235 insertions(+) create mode 100644 .github/workflows/benchmark.yml create mode 100644 benchmarks/benchmark.py diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..aadb053 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,67 @@ +name: Benchmarks + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + benchmark: + name: Benchmarks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + + - name: Install uv + uses: astral-sh/setup-uv@v6 + + - name: Set up Python 3.14 + uses: actions/setup-python@v5 + with: + python-version: '3.14' + + - name: Install dependencies + run: uv sync + + # Restore benchmark baseline (read-only for PRs) + - name: Restore benchmark baseline + uses: actions/cache/restore@v4 + with: + path: .benchmarks + key: benchmark-baseline-3.14-${{ runner.os }} + + # On master: save baseline results + - name: Run benchmarks and save baseline + if: github.ref == 'refs/heads/master' + run: | + uv run --no-sync pytest benchmarks/benchmark.py \ + --benchmark-only \ + --benchmark-autosave \ + --benchmark-sort=name + + # On master: cache the new baseline results + - name: Save benchmark baseline + if: github.ref == 'refs/heads/master' + uses: actions/cache/save@v4 + with: + path: .benchmarks + key: benchmark-baseline-3.14-${{ runner.os }} + + # On PRs: compare against baseline and fail if degraded + - name: Run benchmarks and compare + if: github.event_name == 'pull_request' + run: | + if [ -z "$(uv run --no-sync pytest-benchmark list)" ]; then + echo "No baseline found, skip the benchmark" + exit + fi + + uv run --no-sync pytest benchmarks/benchmark.py \ + --benchmark-only \ + --benchmark-compare \ + --benchmark-compare-fail=mean:5% \ + --benchmark-sort=name + diff --git a/.gitignore b/.gitignore index e674c48..c9dfbba 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ __pycache__ .coverage dist uv.lock +.benchmarks/ diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 0000000..2cca2e3 --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,166 @@ +""" +Benchmark suite for patchdiff performance testing using pytest-benchmark. + +Run benchmarks: + uv run pytest benchmarks/benchmark.py --benchmark-only + +Save baseline: + uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-autosave + +Compare against baseline: + uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001 + +Fail if performance degrades >5%: + uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001 --benchmark-compare-fail=mean:5% +""" + +import random + +import pytest + +from patchdiff import apply, diff + +# Set seed for reproducibility +random.seed(42) + + +def generate_random_list(size: int, value_range: int = 1000) -> list[int]: + """Generate a random list of integers.""" + return [random.randint(0, value_range) for _ in range(size)] + + +def generate_similar_lists( + size: int, change_ratio: float = 0.1 +) -> tuple[list[int], list[int]]: + """ + Generate two similar lists with specified change ratio. + + Args: + size: Size of the lists + change_ratio: Ratio of elements that differ (0.0 to 1.0) + """ + list_a = generate_random_list(size) + list_b = list_a.copy() + + num_changes = int(size * change_ratio) + + # Make some replacements + for _ in range(num_changes // 3): + idx = random.randint(0, size - 1) + list_b[idx] = random.randint(0, 1000) + + # Make some insertions + for _ in range(num_changes // 3): + idx = random.randint(0, len(list_b)) + list_b.insert(idx, random.randint(0, 1000)) + + # Make some deletions + for _ in range(num_changes // 3): + if list_b: + idx = random.randint(0, len(list_b) - 1) + del list_b[idx] + + return list_a, list_b + + +def generate_nested_dict(depth: int, breadth: int) -> dict | int: + """Generate a nested dictionary structure.""" + if depth == 0: + return random.randint(0, 1000) + + result = {} + for i in range(breadth): + key = f"key_{i}" + if random.random() > 0.3: + result[key] = generate_nested_dict(depth - 1, breadth) + else: + result[key] = random.randint(0, 1000) + return result + + +# ======================================== +# List Diff Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="list-diff") +def test_list_diff_small_10pct(benchmark): + """Benchmark: 50 element list with 10% changes.""" + a, b = generate_similar_lists(50, 0.1) + benchmark(diff, a, b) + + +@pytest.mark.benchmark(group="list-diff") +@pytest.mark.parametrize("change_ratio", [0.05, 0.1, 0.5]) +def test_list_diff_medium(benchmark, change_ratio): + """Benchmark: 1000 element list with varying change ratios.""" + a, b = generate_similar_lists(1000, change_ratio) + benchmark(diff, a, b) + + +@pytest.mark.benchmark(group="list-diff-edge") +def test_list_diff_completely_different(benchmark): + """Benchmark: Two completely different 1000 element lists.""" + a = generate_random_list(1000) + b = generate_random_list(1000) + benchmark(diff, a, b) + + +@pytest.mark.benchmark(group="list-diff-edge") +def test_list_diff_identical(benchmark): + """Benchmark: Two identical 10000 element lists.""" + a = generate_random_list(10000) + b = a.copy() + benchmark(diff, a, b) + + +# ======================================== +# Dict Diff Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="dict-diff") +def test_dict_diff_flat_500_keys(benchmark): + """Benchmark: Flat dict with 500 keys, 10% changed.""" + a = {f"key_{i}": i for i in range(500)} + b = a.copy() + # Change 10% + for i in range(50): + b[f"key_{i}"] = i + 500 + + benchmark(diff, a, b) + + +@pytest.mark.benchmark(group="dict-diff") +def test_dict_diff_nested(benchmark): + """Benchmark: Nested dict with depth=3, breadth=5.""" + a = generate_nested_dict(3, 5) + b = generate_nested_dict(3, 5) + benchmark(diff, a, b) + + +# ======================================== +# Mixed Structure Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="mixed") +def test_mixed_dict_with_list_values(benchmark): + """Benchmark: Dict with 50 keys, each containing a 100-element list.""" + a = {f"key_{i}": generate_random_list(100) for i in range(50)} + b = {f"key_{i}": generate_random_list(100) for i in range(50)} + benchmark(diff, a, b) + + +# ======================================== +# Apply Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="apply") +def test_apply_list_1000_elements(benchmark): + """Benchmark: Apply patch to 1000 element list with 10% changes.""" + a, b = generate_similar_lists(1000, 0.1) + ops, _ = diff(a, b) + + benchmark(apply, a, ops) diff --git a/pyproject.toml b/pyproject.toml index 8dacd31..cd35aba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dev = [ "pytest", "pytest-cov", "pytest-watch", + "pytest-benchmark", ] [tool.ruff.lint] From 8c56de6bbe5de9c05b4df3a9621676a6dd9f6a47 Mon Sep 17 00:00:00 2001 From: Berend Klein Haneveld Date: Wed, 19 Nov 2025 11:40:58 +0100 Subject: [PATCH 2/2] Run the benchmark without comparison --- .github/workflows/benchmark.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index aadb053..97623ac 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -55,7 +55,8 @@ jobs: if: github.event_name == 'pull_request' run: | if [ -z "$(uv run --no-sync pytest-benchmark list)" ]; then - echo "No baseline found, skip the benchmark" + echo "No baseline found, not comparing" + uv run --no-sync pytest -v benchmarks/benchmark.py exit fi