From 50875c78438b3bbdb67bf134f73ac83c4ba798c3 Mon Sep 17 00:00:00 2001 From: Justus Rijjke Date: Thu, 1 Jan 2026 14:18:23 +0100 Subject: [PATCH] Consolidate comparison logic into __init__.py --- src/diffpdf/__init__.py | 30 +++++++++++++++++++++-- src/diffpdf/cli.py | 8 +++---- src/diffpdf/comparators.py | 37 ---------------------------- tests/test_api.py | 28 ++++++++++++++++++---- tests/test_cli.py | 13 ++++++++++ tests/test_comparators.py | 49 -------------------------------------- 6 files changed, 67 insertions(+), 98 deletions(-) delete mode 100644 src/diffpdf/comparators.py delete mode 100644 tests/test_comparators.py diff --git a/src/diffpdf/__init__.py b/src/diffpdf/__init__.py index edb57be..490caba 100644 --- a/src/diffpdf/__init__.py +++ b/src/diffpdf/__init__.py @@ -1,8 +1,11 @@ from importlib.metadata import version from pathlib import Path -from .comparators import compare_pdfs +from .hash_check import check_hash from .logger import setup_logging +from .page_check import check_page_counts +from .text_check import check_text_content +from .visual_check import check_visual_content __version__ = version("diffpdf") @@ -21,7 +24,30 @@ def diffpdf( out_path = Path(output_dir) if isinstance(output_dir, str) else output_dir logger = setup_logging(verbosity, save_log) - return compare_pdfs(ref_path, actual_path, threshold, dpi, out_path, logger) + logger.debug("Debug logging enabled") + + logger.info("[1/4] Checking file hashes...") + if check_hash(ref_path, actual_path): + logger.info("Files are identical (hash match)") + return True + logger.info("Hashes differ, continuing checks") + + logger.info("[2/4] Checking page counts...") + if not check_page_counts(ref_path, actual_path, logger): + return False + + logger.info("[3/4] Checking text content...") + if not check_text_content(ref_path, actual_path, logger): + return False + + logger.info("[4/4] Checking visual content...") + if not check_visual_content( + ref_path, actual_path, threshold, dpi, out_path, logger + ): + return False + + logger.info("PDFs are equivalent") + return True __all__ = ["diffpdf", "__version__"] diff --git a/src/diffpdf/cli.py b/src/diffpdf/cli.py index 6177332..3d86d30 100644 --- a/src/diffpdf/cli.py +++ b/src/diffpdf/cli.py @@ -3,7 +3,7 @@ import click -from .comparators import compare_pdfs +from . import diffpdf from .logger import setup_logging @@ -41,14 +41,12 @@ def cli( save_log: bool, ) -> None: """Compare two PDF files for structural, textual, and visual differences.""" - logger = setup_logging(verbosity, save_log) - logger.debug("Debug logging enabled") - try: - if compare_pdfs(reference, actual, threshold, dpi, output_dir, logger): + if diffpdf(reference, actual, threshold, dpi, output_dir, verbosity, save_log): sys.exit(0) else: sys.exit(1) except Exception as e: # pragma: no cover + logger = setup_logging(verbosity, save_log) logger.critical(f"Error: {e}", exc_info=True) sys.exit(2) diff --git a/src/diffpdf/comparators.py b/src/diffpdf/comparators.py deleted file mode 100644 index 551dd43..0000000 --- a/src/diffpdf/comparators.py +++ /dev/null @@ -1,37 +0,0 @@ -import logging -from pathlib import Path - -from .hash_check import check_hash -from .page_check import check_page_counts -from .text_check import check_text_content -from .visual_check import check_visual_content - - -def compare_pdfs( - ref: Path, - actual: Path, - threshold: float, - dpi: int, - output_dir: Path | None, - logger: logging.Logger, -) -> bool: - logger.info("[1/4] Checking file hashes...") - if check_hash(ref, actual): - logger.info("Files are identical (hash match)") - return True - logger.info("Hashes differ, continuing checks") - - logger.info("[2/4] Checking page counts...") - if not check_page_counts(ref, actual, logger): - return False - - logger.info("[3/4] Checking text content...") - if not check_text_content(ref, actual, logger): - return False - - logger.info("[4/4] Checking visual content...") - if not check_visual_content(ref, actual, threshold, dpi, output_dir, logger): - return False - - logger.info("PDFs are equivalent") - return True diff --git a/tests/test_api.py b/tests/test_api.py index 0aaaa81..f8e6326 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,13 +1,31 @@ # type: ignore from pathlib import Path +import pytest + from diffpdf import diffpdf TEST_ASSETS_DIR = Path(__file__).parent / "assets" -def test_diffpdf(): - assert diffpdf( - TEST_ASSETS_DIR / "pass/identical-A.pdf", - TEST_ASSETS_DIR / "pass/identical-B.pdf", - ) +@pytest.mark.parametrize( + "ref_pdf_rel,actual_pdf_rel,should_pass", + [ + # Pass cases + ("pass/identical-A.pdf", "pass/identical-B.pdf", True), + ("pass/hash-diff-A.pdf", "pass/hash-diff-B.pdf", True), + ("pass/minor-color-diff-A.pdf", "pass/minor-color-diff-B.pdf", True), + ("pass/multiplatform-diff-A.pdf", "pass/multiplatform-diff-B.pdf", True), + # Fail cases + ("fail/1-letter-diff-A.pdf", "fail/1-letter-diff-B.pdf", False), + ("fail/major-color-diff-A.pdf", "fail/major-color-diff-B.pdf", False), + ("fail/page-count-diff-A.pdf", "fail/page-count-diff-B.pdf", False), + ], +) +def test_api(ref_pdf_rel, actual_pdf_rel, should_pass): + ref_pdf = TEST_ASSETS_DIR / ref_pdf_rel + actual_pdf = TEST_ASSETS_DIR / actual_pdf_rel + + result = diffpdf(ref_pdf, actual_pdf) + + assert result == should_pass diff --git a/tests/test_cli.py b/tests/test_cli.py index e7afb83..07f7be3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -8,6 +8,19 @@ TEST_ASSETS_DIR = Path(__file__).parent / "assets" +def test_cli_with_output_dir(): + runner = CliRunner() + + with runner.isolated_filesystem(): + ref_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-A.pdf") + actual_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-B.pdf") + + result = runner.invoke(cli, [ref_pdf, actual_pdf, "--output-dir", "./diff"]) + + assert result.exit_code == 1 + assert Path("./diff").exists() + + def test_verbose_flag(): runner = CliRunner() result = runner.invoke( diff --git a/tests/test_comparators.py b/tests/test_comparators.py deleted file mode 100644 index bf050ce..0000000 --- a/tests/test_comparators.py +++ /dev/null @@ -1,49 +0,0 @@ -# type: ignore -from pathlib import Path - -import pytest -from click.testing import CliRunner - -from diffpdf.cli import cli - -TEST_ASSETS_DIR = Path(__file__).parent / "assets" - - -@pytest.mark.parametrize( - "ref_pdf_rel,actual_pdf_rel,expected_exit_code", - [ - # Pass cases (exit code 0) - ("pass/identical-A.pdf", "pass/identical-B.pdf", 0), - ("pass/hash-diff-A.pdf", "pass/hash-diff-B.pdf", 0), - ("pass/minor-color-diff-A.pdf", "pass/minor-color-diff-B.pdf", 0), - ("pass/multiplatform-diff-A.pdf", "pass/multiplatform-diff-B.pdf", 0), - # Fail cases (exit code 1) - ("fail/1-letter-diff-A.pdf", "fail/1-letter-diff-B.pdf", 1), - ("fail/major-color-diff-A.pdf", "fail/major-color-diff-B.pdf", 1), - ("fail/page-count-diff-A.pdf", "fail/page-count-diff-B.pdf", 1), - # Critical error cases (exit code 2) - ("nonexistent.pdf", "another.pdf", 2), - ], -) -def test_comparators(ref_pdf_rel, actual_pdf_rel, expected_exit_code): - runner = CliRunner() - - ref_pdf = str(TEST_ASSETS_DIR / ref_pdf_rel) - actual_pdf = str(TEST_ASSETS_DIR / actual_pdf_rel) - - result = runner.invoke(cli, [ref_pdf, actual_pdf]) - - assert result.exit_code == expected_exit_code - - -def test_comparators_with_output_dir(): - runner = CliRunner() - - with runner.isolated_filesystem(): - ref_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-A.pdf") - actual_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-B.pdf") - - result = runner.invoke(cli, [ref_pdf, actual_pdf, "--output-dir", "./diff"]) - - assert result.exit_code == 1 - assert Path("./diff").exists()