Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ Options:
--threshold FLOAT Pixelmatch threshold (0.0-1.0)
--dpi INTEGER Render resolution
--output-dir DIRECTORY Diff image output directory (optional, if not specified no diff images are saved)
-v, --verbose Increase verbosity (-v for INFO, -vv for DEBUG)
--save-log Write log output to log.txt
-v, --verbose Increase verbosity
--version Show the version and exit.
--help Show this message and exit.
```
Expand All @@ -56,7 +55,7 @@ from diffpdf import diffpdf
diffpdf("reference.pdf", "actual.pdf")

# With options (save diff images to ./output directory)
diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150, verbosity=2)
diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150, verbose=True)
```

## Development
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ classifiers = [
]
dependencies = [
"click",
"colorlog",
"pymupdf>=1.23.0",
"pixelmatch-fast>=1.1.0",
"Pillow>=10.0.0",
Expand Down
14 changes: 5 additions & 9 deletions src/diffpdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,13 @@ def diffpdf(
threshold: float = 0.1,
dpi: int = 96,
output_dir: str | Path | None = None,
verbosity: int = 0,
save_log: bool = False,
verbose: bool = False,
) -> bool:
ref_path = Path(reference) if isinstance(reference, str) else reference
actual_path = Path(actual) if isinstance(actual, str) else actual
out_path = Path(output_dir) if isinstance(output_dir, str) else output_dir

logger = setup_logging(verbosity, save_log)
logger.debug("Debug logging enabled")
logger = setup_logging(verbose)

logger.info("[1/4] Checking file hashes...")
if check_hash(ref_path, actual_path):
Expand All @@ -33,17 +31,15 @@ def diffpdf(
logger.info("Hashes differ, continuing checks")

logger.info("[2/4] Checking page counts...")
if not check_page_counts(ref_path, actual_path, logger):
if not check_page_counts(ref_path, actual_path):
return False

logger.info("[3/4] Checking text content...")
if not check_text_content(ref_path, actual_path, logger):
if not check_text_content(ref_path, actual_path):
return False

logger.info("[4/4] Checking visual content...")
if not check_visual_content(
ref_path, actual_path, threshold, dpi, out_path, logger
):
if not check_visual_content(ref_path, actual_path, threshold, dpi, out_path):
return False

logger.info("PDFs are equivalent")
Expand Down
13 changes: 5 additions & 8 deletions src/diffpdf/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,25 @@
@click.option(
"-v",
"--verbose",
"verbosity",
count=True,
help="Increase verbosity (-v for INFO, -vv for DEBUG)",
is_flag=True,
help="Increase verbosity",
)
@click.option("--save-log", is_flag=True, help="Write log output to log.txt")
@click.version_option(package_name="diffpdf")
def cli(
reference: Path,
actual: Path,
threshold: float,
dpi: int,
output_dir: Path | None,
verbosity: int,
save_log: bool,
verbose: bool,
) -> None:
"""Compare two PDF files for structural, textual, and visual differences."""
try:
if diffpdf(reference, actual, threshold, dpi, output_dir, verbosity, save_log):
if diffpdf(reference, actual, threshold, dpi, output_dir, verbose):
sys.exit(0)
else:
sys.exit(1)
except Exception as e: # pragma: no cover
logger = setup_logging(verbosity, save_log)
logger = setup_logging(verbose)
logger.critical(f"Error: {e}", exc_info=True)
sys.exit(2)
29 changes: 4 additions & 25 deletions src/diffpdf/logger.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,18 @@
import logging

import colorlog

LOG_FORMAT = (
"%(asctime)s %(levelname)-8s %(filename)s:%(lineno)d (%(funcName)s): %(message)s"
)
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
LOG_COLORS = {
"DEBUG": "cyan",
"INFO": "green",
"WARNING": "yellow",
"ERROR": "red",
"CRITICAL": "red,bg_white",
}


def setup_logging(verbosity: int, save_log: bool) -> logging.Logger:
if verbosity == 0:
level = logging.WARNING
elif verbosity == 1:
def setup_logging(verbose: bool) -> logging.Logger:
if verbose:
level = logging.INFO
else:
level = logging.DEBUG
level = logging.WARNING

formatter = colorlog.ColoredFormatter(
f"%(log_color)s{LOG_FORMAT}%(reset)s",
datefmt=DATE_FORMAT,
log_colors=LOG_COLORS,
)
formatter = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)

console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
Expand All @@ -36,10 +21,4 @@ def setup_logging(verbosity: int, save_log: bool) -> logging.Logger:
logger.setLevel(level)
logger.addHandler(console_handler)

if save_log: # pragma: no cover
file_formatter = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
file_handler = logging.FileHandler("log.txt")
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)

return logger
3 changes: 2 additions & 1 deletion src/diffpdf/page_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ def get_page_count(pdf_path: Path) -> int:
return count


def check_page_counts(ref: Path, actual: Path, logger: logging.Logger) -> bool:
def check_page_counts(ref: Path, actual: Path) -> bool:
logger = logging.getLogger()
ref_count = get_page_count(ref)
actual_count = get_page_count(actual)

Expand Down
3 changes: 2 additions & 1 deletion src/diffpdf/text_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def generate_diff(
return diff


def check_text_content(ref: Path, actual: Path, logger: logging.Logger) -> bool:
def check_text_content(ref: Path, actual: Path) -> bool:
logger = logging.getLogger()
# Extract text and remove whitespace
ref_text = re.sub(r"\s+", " ", extract_text(ref)).strip()
actual_text = re.sub(r"\s+", " ", extract_text(actual)).strip()
Expand Down
2 changes: 1 addition & 1 deletion src/diffpdf/visual_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def check_visual_content(
threshold: float,
dpi: int,
output_dir: Path | None,
logger: logging.Logger,
) -> bool:
logger = logging.getLogger()
if output_dir is not None:
output_dir.mkdir(parents=True, exist_ok=True)

Expand Down
15 changes: 0 additions & 15 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,3 @@ def test_verbose_flag():
)
assert result.exit_code == 0
assert "INFO" in result.output
assert "DEBUG" not in result.output


def test_double_verbose_flag():
runner = CliRunner()
result = runner.invoke(
cli,
[
str(TEST_ASSETS_DIR / "pass/identical-A.pdf"),
str(TEST_ASSETS_DIR / "pass/identical-B.pdf"),
"-vv",
],
)
assert result.exit_code == 0
assert "DEBUG" in result.output