From 41033b022643be0842e3796be0a2fc8af5b7fff2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Oct 2025 04:27:03 +0000
Subject: [PATCH 1/4] Initial plan


From c0d038965f8dc0ee600d22a56dbaf53d3d63105e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Oct 2025 04:38:17 +0000
Subject: [PATCH 2/4] Implement complete tablediff-arrow package with CLI,
 tests, and CI

Co-authored-by: psmman <161755837+psmman@users.noreply.github.com>
---
 .github/workflows/ci.yml        |  76 +++++++++
 .pre-commit-config.yaml         |  31 ++++
 README.md                       | 283 ++++++++++++++++++++++++++++++++
 pyproject.toml                  |  81 +++++++++
 src/tablediff_arrow/__init__.py |   9 +
 src/tablediff_arrow/cli.py      | 176 ++++++++++++++++++++
 src/tablediff_arrow/compare.py  | 213 ++++++++++++++++++++++++
 src/tablediff_arrow/loader.py   |  67 ++++++++
 src/tablediff_arrow/reports.py  | 275 +++++++++++++++++++++++++++++++
 tests/test_compare.py           | 214 ++++++++++++++++++++++++
 tests/test_loader.py            |  97 +++++++++++
 tests/test_reports.py           | 100 +++++++++++
 12 files changed, 1622 insertions(+)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 pyproject.toml
 create mode 100644 src/tablediff_arrow/__init__.py
 create mode 100644 src/tablediff_arrow/cli.py
 create mode 100644 src/tablediff_arrow/compare.py
 create mode 100644 src/tablediff_arrow/loader.py
 create mode 100644 src/tablediff_arrow/reports.py
 create mode 100644 tests/test_compare.py
 create mode 100644 tests/test_loader.py
 create mode 100644 tests/test_reports.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..dbd2967
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,76 @@
+name: CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e ".[dev]"
+
+    - name: Lint with ruff
+      run: |
+        ruff check src tests
+
+    - name: Check formatting with black
+      run: |
+        black --check src tests
+
+    - name: Type check with mypy
+      run: |
+        mypy src --ignore-missing-imports
+
+    - name: Run tests with pytest
+      run: |
+        pytest tests/ -v --cov=tablediff_arrow --cov-report=xml
+
+    - name: Upload coverage to Codecov
+      if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
+      uses: codecov/codecov-action@v4
+      with:
+        file: ./coverage.xml
+        fail_ci_if_error: false
+
+  build:
+    runs-on: ubuntu-latest
+    needs: test
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.11"
+
+    - name: Install build dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+
+    - name: Build package
+      run: python -m build
+
+    - name: Check package
+      run: |
+        pip install twine
+        twine check dist/*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..d08654b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,31 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: check-json
+      - id: check-toml
+      - id: check-merge-conflict
+      - id: debug-statements
+
+  - repo: https://github.com/psf/black
+    rev: 23.12.1
+    hooks:
+      - id: black
+        language_version: python3.10
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.11
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.8.0
+    hooks:
+      - id: mypy
+        additional_dependencies: [types-all]
+        args: [--ignore-missing-imports]
diff --git a/README.md b/README.md
index 7ff5d7e..78e275b 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,285 @@
 # tablediff-arrow
+
 Fast, file-based diffs for Parquet/CSV/Arrow (local or S3) with keyed comparisons, per-column tolerances, and HTML/CSV reports—built on Apache Arrow.
+
+[![CI](https://github.com/psmman/tablediff-arrow/workflows/CI/badge.svg)](https://github.com/psmman/tablediff-arrow/actions)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+
+## Features
+
+- 🚀 **Fast**: Built on Apache Arrow for high-performance data processing
+- 📁 **Multiple Formats**: Support for Parquet, CSV, and Arrow IPC files
+- ☁️ **S3 Support**: Read files directly from S3 (optional)
+- 🔑 **Keyed Comparisons**: Compare tables using one or more key columns
+- 🎯 **Numeric Tolerances**: Configure absolute and relative tolerances for numeric columns
+- 📊 **Rich Reports**: Generate HTML and CSV reports with detailed differences
+- 🐍 **Python 3.10+**: Modern Python with type hints and clean APIs
+- 🧪 **Well Tested**: Comprehensive test suite with high coverage
+
+## Installation
+
+```bash
+pip install tablediff-arrow
+```
+
+For S3 support:
+
+```bash
+pip install tablediff-arrow[s3]
+```
+
+For development:
+
+```bash
+pip install -e ".[dev]"
+```
+
+## Quick Start
+
+### Command Line Interface
+
+Compare two Parquet files using `id` as the key column:
+
+```bash
+tablediff left.parquet right.parquet -k id
+```
+
+Compare with numeric tolerance:
+
+```bash
+tablediff left.csv right.csv -k id -t amount:0.01
+```
+
+Generate an HTML report:
+
+```bash
+tablediff left.parquet right.parquet -k id -o report.html
+```
+
+Compare S3 files:
+
+```bash
+tablediff s3://bucket/left.parquet s3://bucket/right.parquet -k id --s3
+```
+
+### Python API
+
+```python
+from tablediff_arrow import TableDiff
+
+# Create a differ with key columns and tolerances
+differ = TableDiff(
+    key_columns=['id'],
+    tolerance={'amount': 0.01},  # Absolute tolerance
+    relative_tolerance={'price': 0.001}  # Relative tolerance (0.1%)
+)
+
+# Compare files
+result = differ.compare_files('left.parquet', 'right.parquet')
+
+# Print summary
+print(result.summary())
+
+# Check if there are differences
+if result.has_differences:
+    print(f"Found {result.changed_rows} changed rows")
+    print(f"Found {result.left_only_rows} rows only in left")
+    print(f"Found {result.right_only_rows} rows only in right")
+
+# Generate reports
+from tablediff_arrow.reports import generate_html_report, generate_csv_report
+
+generate_html_report(result, 'report.html')
+generate_csv_report(result, 'output_dir/', prefix='diff')
+```
+
+## Usage Examples
+
+### Multiple Key Columns
+
+Compare tables using composite keys:
+
+```bash
+tablediff left.parquet right.parquet -k year -k month -k product
+```
+
+```python
+differ = TableDiff(key_columns=['year', 'month', 'product'])
+result = differ.compare_files('left.parquet', 'right.parquet')
+```
+
+### Numeric Tolerances
+
+Use absolute tolerance for monetary values:
+
+```bash
+tablediff left.csv right.csv -k id -t amount:0.01 -t balance:0.001
+```
+
+Use relative tolerance for percentages:
+
+```bash
+tablediff left.csv right.csv -k id -r rate:0.001 -r score:0.01
+```
+
+```python
+differ = TableDiff(
+    key_columns=['id'],
+    tolerance={'amount': 0.01, 'balance': 0.001},
+    relative_tolerance={'rate': 0.001, 'score': 0.01}
+)
+```
+
+### Working with PyArrow Tables
+
+```python
+import pyarrow as pa
+from tablediff_arrow import TableDiff
+
+# Create tables directly
+left = pa.table({'id': [1, 2, 3], 'value': [10, 20, 30]})
+right = pa.table({'id': [1, 2, 3], 'value': [10, 21, 30]})
+
+# Compare
+differ = TableDiff(key_columns=['id'])
+result = differ.compare_tables(left, right)
+
+print(result.summary())
+```
+
+### S3 Files
+
+```python
+import s3fs
+from tablediff_arrow import TableDiff
+
+# Create S3 filesystem
+fs = s3fs.S3FileSystem()
+
+# Compare S3 files
+differ = TableDiff(key_columns=['id'])
+result = differ.compare_files(
+    's3://my-bucket/left.parquet',
+    's3://my-bucket/right.parquet',
+    filesystem=fs
+)
+```
+
+## CLI Options
+
+```
+Usage: tablediff [OPTIONS] LEFT RIGHT
+
+  Compare two tables and generate diff reports.
+
+Arguments:
+  LEFT   Path to the left/source table file (local or s3://)
+  RIGHT  Path to the right/target table file (local or s3://)
+
+Options:
+  -k, --key TEXT              Key column(s) for comparison (required, can be
+                              specified multiple times)
+  -t, --tolerance TEXT        Absolute tolerance for numeric columns
+                              (format: column:value)
+  -r, --relative-tolerance    Relative tolerance for numeric columns
+                              (format: column:value)
+  --left-format [parquet|csv|arrow]
+                              Format of the left file
+  --right-format [parquet|csv|arrow]
+                              Format of the right file
+  -o, --output TEXT           Output file path for HTML report
+  --csv-output PATH           Output directory for CSV reports
+  --s3                        Enable S3 filesystem support
+  --help                      Show this message and exit.
+```
+
+## Output Reports
+
+### HTML Report
+
+The HTML report provides an interactive view of differences:
+
+- Summary statistics (matched, changed, added, removed rows)
+- Color-coded differences table
+- Separate sections for left-only and right-only rows
+- Change counts per column
+
+### CSV Reports
+
+CSV output generates multiple files:
+
+- `{prefix}_summary.csv`: Summary statistics
+- `{prefix}_changes.csv`: Detailed changes with old and new values
+- `{prefix}_left_only.csv`: Rows only in the left table
+- `{prefix}_right_only.csv`: Rows only in the right table
+
+## Development
+
+### Setup
+
+```bash
+# Clone the repository
+git clone https://github.com/psmman/tablediff-arrow.git
+cd tablediff-arrow
+
+# Install with development dependencies
+pip install -e ".[dev]"
+
+# Install pre-commit hooks
+pre-commit install
+```
+
+### Running Tests
+
+```bash
+# Run all tests
+pytest
+
+# Run with coverage
+pytest --cov=tablediff_arrow --cov-report=html
+
+# Run specific test file
+pytest tests/test_compare.py
+```
+
+### Code Quality
+
+```bash
+# Format code
+black src tests
+
+# Lint
+ruff check src tests
+
+# Type check
+mypy src
+```
+
+### Pre-commit Hooks
+
+The project uses pre-commit hooks to ensure code quality:
+
+- trailing-whitespace: Remove trailing whitespace
+- end-of-file-fixer: Ensure files end with a newline
+- check-yaml/json/toml: Validate config files
+- black: Format Python code
+- ruff: Lint Python code
+- mypy: Type checking
+
+## Requirements
+
+- Python 3.10 or higher
+- pyarrow >= 14.0.0
+- pandas >= 2.0.0
+- click >= 8.0.0
+- jinja2 >= 3.0.0
+- s3fs >= 2023.0.0 (optional, for S3 support)
+
+## License
+
+MIT License - see [LICENSE](LICENSE) file for details.
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..6ded538
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,81 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tablediff-arrow"
+version = "0.1.0"
+description = "Fast, file-based diffs for Parquet/CSV/Arrow (local or S3) with keyed comparisons, per-column tolerances, and HTML/CSV reports—built on Apache Arrow."
+readme = "README.md"
+authors = [
+    {name = "Prasenjit Singh", email = "psmman@users.noreply.github.com"}
+]
+license = {text = "MIT"}
+requires-python = ">=3.10"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+keywords = ["diff", "parquet", "csv", "arrow", "data-comparison"]
+
+dependencies = [
+    "pyarrow>=14.0.0",
+    "pandas>=2.0.0",
+    "click>=8.0.0",
+    "jinja2>=3.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-cov>=4.0.0",
+    "black>=23.0.0",
+    "ruff>=0.1.0",
+    "mypy>=1.0.0",
+    "pre-commit>=3.0.0",
+]
+s3 = [
+    "s3fs>=2023.0.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/psmman/tablediff-arrow"
+Repository = "https://github.com/psmman/tablediff-arrow"
+Issues = "https://github.com/psmman/tablediff-arrow/issues"
+
+[project.scripts]
+tablediff = "tablediff_arrow.cli:main"
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.black]
+line-length = 100
+target-version = ["py310", "py311", "py312"]
+
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+
+[tool.ruff.lint]
+select = ["E", "F", "I", "N", "W", "B", "UP"]
+ignore = ["E501"]
+
+[tool.mypy]
+python_version = "3.10"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = false
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = "test_*.py"
+python_classes = "Test*"
+python_functions = "test_*"
+addopts = "-v --cov=tablediff_arrow --cov-report=term-missing --cov-report=html"
diff --git a/src/tablediff_arrow/__init__.py b/src/tablediff_arrow/__init__.py
new file mode 100644
index 0000000..3190fd6
--- /dev/null
+++ b/src/tablediff_arrow/__init__.py
@@ -0,0 +1,9 @@
+"""
+tablediff-arrow: Fast, file-based diffs for Parquet/CSV/Arrow data.
+"""
+
+__version__ = "0.1.0"
+
+from .compare import DiffResult, TableDiff
+
+__all__ = ["TableDiff", "DiffResult", "__version__"]
diff --git a/src/tablediff_arrow/cli.py b/src/tablediff_arrow/cli.py
new file mode 100644
index 0000000..f61ad4c
--- /dev/null
+++ b/src/tablediff_arrow/cli.py
@@ -0,0 +1,176 @@
+"""
+Command-line interface for tablediff-arrow.
+"""
+
+import sys
+
+import click
+
+from .compare import TableDiff
+from .reports import generate_csv_report, generate_html_report
+
+
+@click.command()
+@click.argument("left", type=click.Path(exists=True))
+@click.argument("right", type=click.Path(exists=True))
+@click.option(
+    "--key",
+    "-k",
+    multiple=True,
+    required=True,
+    help="Key column(s) for comparison. Can be specified multiple times.",
+)
+@click.option(
+    "--tolerance",
+    "-t",
+    multiple=True,
+    help='Absolute tolerance for numeric columns in format "column:value". '
+    "Example: --tolerance amount:0.01",
+)
+@click.option(
+    "--relative-tolerance",
+    "-r",
+    multiple=True,
+    help='Relative tolerance for numeric columns in format "column:value". '
+    "Example: --relative-tolerance price:0.001",
+)
+@click.option(
+    "--left-format",
+    type=click.Choice(["parquet", "csv", "arrow"], case_sensitive=False),
+    help="Format of the left file. If not specified, inferred from extension.",
+)
+@click.option(
+    "--right-format",
+    type=click.Choice(["parquet", "csv", "arrow"], case_sensitive=False),
+    help="Format of the right file. If not specified, inferred from extension.",
+)
+@click.option(
+    "--output",
+    "-o",
+    help="Output file path for HTML report. If not specified, prints summary to console.",
+)
+@click.option(
+    "--csv-output",
+    type=click.Path(),
+    help="Output directory for CSV reports. Generates separate files for changes, "
+    "left-only, and right-only rows.",
+)
+@click.option("--s3", is_flag=True, help="Enable S3 filesystem support for reading files from S3.")
+def main(
+    left: str,
+    right: str,
+    key: tuple,
+    tolerance: tuple,
+    relative_tolerance: tuple,
+    left_format: str | None,
+    right_format: str | None,
+    output: str | None,
+    csv_output: str | None,
+    s3: bool,
+):
+    """
+    Compare two tables and generate diff reports.
+
+    LEFT: Path to the left/source table file (local or s3://)
+
+    RIGHT: Path to the right/target table file (local or s3://)
+
+    Examples:
+
+        # Compare two Parquet files using 'id' as key
+        tablediff left.parquet right.parquet -k id
+
+        # Compare with numeric tolerance
+        tablediff left.csv right.csv -k id -t amount:0.01
+
+        # Generate HTML report
+        tablediff left.parquet right.parquet -k id -o report.html
+
+        # Compare S3 files
+        tablediff s3://bucket/left.parquet s3://bucket/right.parquet -k id --s3
+    """
+    # Parse tolerances
+    abs_tolerance = {}
+    for tol in tolerance:
+        try:
+            col, val = tol.split(":", 1)
+            abs_tolerance[col] = float(val)
+        except ValueError:
+            click.echo(
+                f"Error: Invalid tolerance format '{tol}'. Expected 'column:value'", err=True
+            )
+            sys.exit(1)
+
+    rel_tolerance = {}
+    for tol in relative_tolerance:
+        try:
+            col, val = tol.split(":", 1)
+            rel_tolerance[col] = float(val)
+        except ValueError:
+            click.echo(
+                f"Error: Invalid relative tolerance format '{tol}'. Expected 'column:value'",
+                err=True,
+            )
+            sys.exit(1)
+
+    # Setup filesystem for S3
+    filesystem = None
+    if s3 or left.startswith("s3://") or right.startswith("s3://"):
+        try:
+            import s3fs
+
+            filesystem = s3fs.S3FileSystem()
+        except ImportError:
+            click.echo(
+                "Error: s3fs is required for S3 access. "
+                "Install with: pip install tablediff-arrow[s3]",
+                err=True,
+            )
+            sys.exit(1)
+
+    # Create TableDiff instance
+    differ = TableDiff(
+        key_columns=list(key),
+        tolerance=abs_tolerance,
+        relative_tolerance=rel_tolerance,
+    )
+
+    # Perform comparison
+    try:
+        click.echo("Loading tables...")
+        result = differ.compare_files(
+            left,
+            right,
+            left_format=left_format,
+            right_format=right_format,
+            filesystem=filesystem,
+        )
+
+        # Print summary
+        click.echo("\n" + result.summary())
+
+        # Generate HTML report if requested
+        if output:
+            click.echo(f"\nGenerating HTML report: {output}")
+            generate_html_report(result, output)
+            click.echo(f"HTML report written to {output}")
+
+        # Generate CSV reports if requested
+        if csv_output:
+            click.echo(f"\nGenerating CSV reports in: {csv_output}")
+            generate_csv_report(result, csv_output)
+            click.echo(f"CSV reports written to {csv_output}")
+
+        # Exit with non-zero code if differences found
+        if result.has_differences:
+            sys.exit(1)
+        else:
+            sys.exit(0)
+
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        sys.exit(2)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/tablediff_arrow/compare.py b/src/tablediff_arrow/compare.py
new file mode 100644
index 0000000..eb5ed14
--- /dev/null
+++ b/src/tablediff_arrow/compare.py
@@ -0,0 +1,213 @@
+"""
+Core comparison logic for table diffs with keyed comparisons and numeric tolerances.
+"""
+
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import pandas as pd
+import pyarrow as pa
+
+from .loader import load_table
+
+
+@dataclass
+class DiffResult:
+    """Result of a table comparison."""
+
+    matched_rows: int = 0
+    left_only_rows: int = 0
+    right_only_rows: int = 0
+    changed_rows: int = 0
+    total_changes: int = 0
+    column_changes: dict[str, int] = field(default_factory=dict)
+    differences: pa.Table | None = None
+    left_only: pa.Table | None = None
+    right_only: pa.Table | None = None
+
+    @property
+    def has_differences(self) -> bool:
+        """Check if there are any differences."""
+        return self.left_only_rows > 0 or self.right_only_rows > 0 or self.changed_rows > 0
+
+    def summary(self) -> str:
+        """Return a summary string of the comparison."""
+        lines = [
+            "Table Comparison Summary:",
+            f"  Matched rows: {self.matched_rows}",
+            f"  Changed rows: {self.changed_rows}",
+            f"  Left-only rows: {self.left_only_rows}",
+            f"  Right-only rows: {self.right_only_rows}",
+            f"  Total changes: {self.total_changes}",
+        ]
+        if self.column_changes:
+            lines.append("  Changes by column:")
+            for col, count in sorted(self.column_changes.items()):
+                lines.append(f"    {col}: {count}")
+        return "\n".join(lines)
+
+
+class TableDiff:
+    """
+    Compare two tables with keyed comparisons and numeric tolerances.
+    """
+
+    def __init__(
+        self,
+        key_columns: list[str],
+        tolerance: dict[str, float] | None = None,
+        relative_tolerance: dict[str, float] | None = None,
+    ):
+        """
+        Initialize TableDiff.
+
+        Args:
+            key_columns: List of column names to use as the join key
+            tolerance: Absolute tolerance per column for numeric comparisons
+            relative_tolerance: Relative tolerance per column for numeric comparisons
+        """
+        self.key_columns = key_columns
+        self.tolerance = tolerance or {}
+        self.relative_tolerance = relative_tolerance or {}
+
+    def compare_files(
+        self,
+        left_path: str | Path,
+        right_path: str | Path,
+        left_format: str | None = None,
+        right_format: str | None = None,
+        filesystem=None,
+    ) -> DiffResult:
+        """
+        Compare two files.
+
+        Args:
+            left_path: Path to the left/source file
+            right_path: Path to the right/target file
+            left_format: Format of left file (inferred if None)
+            right_format: Format of right file (inferred if None)
+            filesystem: Optional filesystem for S3 access
+
+        Returns:
+            DiffResult object
+        """
+        left_table = load_table(left_path, left_format, filesystem)
+        right_table = load_table(right_path, right_format, filesystem)
+        return self.compare_tables(left_table, right_table)
+
+    def compare_tables(
+        self,
+        left: pa.Table,
+        right: pa.Table,
+    ) -> DiffResult:
+        """
+        Compare two PyArrow tables.
+
+        Args:
+            left: Left/source table
+            right: Right/target table
+
+        Returns:
+            DiffResult object
+        """
+        # Convert to pandas for easier comparison
+        left_df = left.to_pandas()
+        right_df = right.to_pandas()
+
+        # Validate key columns exist
+        for col in self.key_columns:
+            if col not in left_df.columns:
+                raise ValueError(f"Key column '{col}' not found in left table")
+            if col not in right_df.columns:
+                raise ValueError(f"Key column '{col}' not found in right table")
+
+        # Set index on key columns for comparison
+        left_df = left_df.set_index(self.key_columns)
+        right_df = right_df.set_index(self.key_columns)
+
+        # Find rows only in left, only in right, and in both
+        left_only_idx = left_df.index.difference(right_df.index)
+        right_only_idx = right_df.index.difference(left_df.index)
+        common_idx = left_df.index.intersection(right_df.index)
+
+        result = DiffResult()
+        result.left_only_rows = len(left_only_idx)
+        result.right_only_rows = len(right_only_idx)
+
+        # Store left-only and right-only rows
+        if len(left_only_idx) > 0:
+            result.left_only = pa.Table.from_pandas(left_df.loc[left_only_idx].reset_index())
+        if len(right_only_idx) > 0:
+            result.right_only = pa.Table.from_pandas(right_df.loc[right_only_idx].reset_index())
+
+        # Compare common rows
+        if len(common_idx) > 0:
+            left_common = left_df.loc[common_idx]
+            right_common = right_df.loc[common_idx]
+
+            # Compare each column
+            diff_rows = []
+            column_changes = {}
+
+            for col in left_common.columns:
+                if col not in right_common.columns:
+                    continue
+
+                left_col = left_common[col]
+                right_col = right_common[col]
+
+                # Apply tolerance for numeric columns
+                if pd.api.types.is_numeric_dtype(left_col):
+                    abs_tol = self.tolerance.get(col, 0.0)
+                    rel_tol = self.relative_tolerance.get(col, 0.0)
+
+                    if rel_tol > 0:
+                        # Relative tolerance
+                        diff_mask = ~pd.isna(left_col) & ~pd.isna(right_col)
+                        if diff_mask.any():
+                            rel_diff = (left_col - right_col).abs() / right_col.abs()
+                            diff_mask = diff_mask & (rel_diff > rel_tol)
+                    elif abs_tol > 0:
+                        # Absolute tolerance
+                        diff_mask = (left_col - right_col).abs() > abs_tol
+                    else:
+                        # No tolerance
+                        diff_mask = left_col != right_col
+                        # Handle NaN comparisons
+                        diff_mask = diff_mask & ~(pd.isna(left_col) & pd.isna(right_col))
+                else:
+                    # Non-numeric comparison
+                    diff_mask = left_col != right_col
+                    # Handle NaN/None comparisons
+                    diff_mask = diff_mask & ~(pd.isna(left_col) & pd.isna(right_col))
+
+                num_changes = diff_mask.sum()
+                if num_changes > 0:
+                    column_changes[col] = int(num_changes)
+
+                    # Record differences
+                    for idx in diff_mask[diff_mask].index:
+                        diff_rows.append(
+                            {
+                                **{
+                                    k: idx[i] if isinstance(idx, tuple) else idx
+                                    for i, k in enumerate(self.key_columns)
+                                },
+                                "column": col,
+                                "left_value": left_col.loc[idx],
+                                "right_value": right_col.loc[idx],
+                            }
+                        )
+
+            result.column_changes = column_changes
+            result.total_changes = sum(column_changes.values())
+            result.changed_rows = len(set(tuple(d[k] for k in self.key_columns) for d in diff_rows))
+            result.matched_rows = len(common_idx) - result.changed_rows
+
+            # Create differences table
+            if diff_rows:
+                result.differences = pa.Table.from_pandas(pd.DataFrame(diff_rows))
+        else:
+            result.matched_rows = 0
+
+        return result
diff --git a/src/tablediff_arrow/loader.py b/src/tablediff_arrow/loader.py
new file mode 100644
index 0000000..be5e861
--- /dev/null
+++ b/src/tablediff_arrow/loader.py
@@ -0,0 +1,67 @@
+"""
+Data loading module for reading Parquet, CSV, and Arrow files from local or S3.
+"""
+
+from pathlib import Path
+
+import pyarrow as pa
+import pyarrow.csv as csv
+import pyarrow.parquet as pq
+
+
+def load_table(path: str | Path, file_format: str | None = None, filesystem=None) -> pa.Table:
+    """
+    Load a table from a file path (local or S3).
+
+    Args:
+        path: Path to the file (local path or S3 URI like s3://bucket/key)
+        file_format: File format ('parquet', 'csv', 'arrow'). If None, inferred from extension.
+        filesystem: Optional filesystem object (e.g., s3fs for S3 access)
+
+    Returns:
+        PyArrow Table
+    """
+    path_str = str(path)
+
+    # Infer format from extension if not provided
+    if file_format is None:
+        if path_str.endswith(".parquet") or path_str.endswith(".pq"):
+            file_format = "parquet"
+        elif path_str.endswith(".csv"):
+            file_format = "csv"
+        elif path_str.endswith(".arrow") or path_str.endswith(".feather"):
+            file_format = "arrow"
+        else:
+            raise ValueError(f"Cannot infer file format from path: {path_str}")
+
+    # Handle S3 paths
+    if path_str.startswith("s3://"):
+        if filesystem is None:
+            try:
+                import s3fs
+
+                filesystem = s3fs.S3FileSystem()
+            except ImportError as err:
+                raise ImportError(
+                    "s3fs is required for S3 access. Install with: pip install tablediff-arrow[s3]"
+                ) from err
+
+    # Load based on format
+    if file_format == "parquet":
+        return pq.read_table(path_str, filesystem=filesystem)
+    elif file_format == "csv":
+        if filesystem:
+            with filesystem.open(path_str, "rb") as f:
+                return csv.read_csv(f)
+        else:
+            return csv.read_csv(path_str)
+    elif file_format == "arrow":
+        if filesystem:
+            with filesystem.open(path_str, "rb") as f:
+                with pa.ipc.open_file(f) as reader:
+                    return reader.read_all()
+        else:
+            with pa.ipc.open_file(path_str) as reader:
+                return reader.read_all()
+    else:
+        raise ValueError(f"Unsupported file format: {file_format}")
diff --git a/src/tablediff_arrow/reports.py b/src/tablediff_arrow/reports.py
new file mode 100644
index 0000000..73eaf06
--- /dev/null
+++ b/src/tablediff_arrow/reports.py
@@ -0,0 +1,275 @@
+"""
+Report generation for table diffs (HTML and CSV formats).
+"""
+
+from pathlib import Path
+
+import pandas as pd
+from jinja2 import Template
+
+from .compare import DiffResult
+
+HTML_TEMPLATE = """
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <title>Table Diff Report</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            margin: 20px;
+            background-color: #f5f5f5;
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            background-color: white;
+            padding: 20px;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        h1 {
+            color: #333;
+            border-bottom: 2px solid #4CAF50;
+            padding-bottom: 10px;
+        }
+        h2 {
+            color: #666;
+            margin-top: 30px;
+        }
+        .summary {
+            background-color: #f9f9f9;
+            padding: 15px;
+            border-radius: 4px;
+            margin: 20px 0;
+        }
+        .summary-item {
+            margin: 8px 0;
+            font-size: 14px;
+        }
+        .summary-item .label {
+            font-weight: bold;
+            display: inline-block;
+            width: 180px;
+        }
+        .summary-item .value {
+            color: #333;
+        }
+        .status {
+            display: inline-block;
+            padding: 4px 12px;
+            border-radius: 4px;
+            font-weight: bold;
+            margin-left: 10px;
+        }
+        .status-match {
+            background-color: #4CAF50;
+            color: white;
+        }
+        .status-diff {
+            background-color: #f44336;
+            color: white;
+        }
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin: 20px 0;
+            font-size: 14px;
+        }
+        th, td {
+            padding: 10px;
+            text-align: left;
+            border-bottom: 1px solid #ddd;
+        }
+        th {
+            background-color: #4CAF50;
+            color: white;
+            font-weight: bold;
+        }
+        tr:hover {
+            background-color: #f5f5f5;
+        }
+        .removed {
+            background-color: #ffebee;
+        }
+        .added {
+            background-color: #e8f5e9;
+        }
+        .changed {
+            background-color: #fff3e0;
+        }
+        .column-changes {
+            list-style: none;
+            padding-left: 0;
+        }
+        .column-changes li {
+            padding: 4px 0;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Table Diff Report</h1>
+
+        <div class="summary">
+            <h2>Summary
+                {% if has_differences %}
+                <span class="status status-diff">DIFFERENCES FOUND</span>
+                {% else %}
+                <span class="status status-match">TABLES MATCH</span>
+                {% endif %}
+            </h2>
+            <div class="summary-item">
+                <span class="label">Matched rows:</span>
+                <span class="value">{{ matched_rows }}</span>
+            </div>
+            <div class="summary-item">
+                <span class="label">Changed rows:</span>
+                <span class="value">{{ changed_rows }}</span>
+            </div>
+            <div class="summary-item">
+                <span class="label">Left-only rows:</span>
+                <span class="value">{{ left_only_rows }}</span>
+            </div>
+            <div class="summary-item">
+                <span class="label">Right-only rows:</span>
+                <span class="value">{{ right_only_rows }}</span>
+            </div>
+            <div class="summary-item">
+                <span class="label">Total changes:</span>
+                <span class="value">{{ total_changes }}</span>
+            </div>
+
+            {% if column_changes %}
+            <div class="summary-item">
+                <span class="label">Changes by column:</span>
+                <ul class="column-changes">
+                    {% for col, count in column_changes.items() %}
+                    <li>{{ col }}: {{ count }}</li>
+                    {% endfor %}
+                </ul>
+            </div>
+            {% endif %}
+        </div>
+
+        {% if differences_html %}
+        <h2>Changed Values</h2>
+        {{ differences_html | safe }}
+        {% endif %}
+
+        {% if left_only_html %}
+        <h2>Rows Only in Left Table</h2>
+        {{ left_only_html | safe }}
+        {% endif %}
+
+        {% if right_only_html %}
+        <h2>Rows Only in Right Table</h2>
+        {{ right_only_html | safe }}
+        {% endif %}
+    </div>
+</body>
+</html>
+"""
+
+
+def generate_html_report(
+    result: DiffResult,
+    output_path: str | Path,
+) -> None:
+    """
+    Generate an HTML report from a DiffResult.
+
+    Args:
+        result: The DiffResult to report on
+        output_path: Path to write the HTML report
+    """
+    template = Template(HTML_TEMPLATE)
+
+    # Convert tables to HTML
+    differences_html = None
+    if result.differences is not None:
+        df = result.differences.to_pandas()
+        differences_html = df.to_html(index=False, classes="changed")
+
+    left_only_html = None
+    if result.left_only is not None:
+        df = result.left_only.to_pandas()
+        left_only_html = df.to_html(index=False, classes="removed")
+
+    right_only_html = None
+    if result.right_only is not None:
+        df = result.right_only.to_pandas()
+        right_only_html = df.to_html(index=False, classes="added")
+
+    html = template.render(
+        has_differences=result.has_differences,
+        matched_rows=result.matched_rows,
+        changed_rows=result.changed_rows,
+        left_only_rows=result.left_only_rows,
+        right_only_rows=result.right_only_rows,
+        total_changes=result.total_changes,
+        column_changes=result.column_changes,
+        differences_html=differences_html,
+        left_only_html=left_only_html,
+        right_only_html=right_only_html,
+    )
+
+    Path(output_path).write_text(html, encoding="utf-8")
+
+
+def generate_csv_report(
+    result: DiffResult,
+    output_dir: str | Path,
+    prefix: str = "diff",
+) -> None:
+    """
+    Generate CSV reports from a DiffResult.
+
+    Creates separate CSV files for differences, left-only, and right-only rows.
+
+    Args:
+        result: The DiffResult to report on
+        output_dir: Directory to write CSV files
+        prefix: Prefix for the CSV filenames
+    """
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    if result.differences is not None:
+        df = result.differences.to_pandas()
+        df.to_csv(output_path / f"{prefix}_changes.csv", index=False)
+
+    if result.left_only is not None:
+        df = result.left_only.to_pandas()
+        df.to_csv(output_path / f"{prefix}_left_only.csv", index=False)
+
+    if result.right_only is not None:
+        df = result.right_only.to_pandas()
+        df.to_csv(output_path / f"{prefix}_right_only.csv", index=False)
+
+    # Generate summary CSV
+    summary_data: dict[str, list[str | int]] = {
+        "metric": [
+            "matched_rows",
+            "changed_rows",
+            "left_only_rows",
+            "right_only_rows",
+            "total_changes",
+        ],
+        "value": [
+            result.matched_rows,
+            result.changed_rows,
+            result.left_only_rows,
+            result.right_only_rows,
+            result.total_changes,
+        ],
+    }
+
+    # Add column changes
+    for col, count in result.column_changes.items():
+        summary_data["metric"].append(f"column_{col}_changes")
+        summary_data["value"].append(count)
+
+    summary_df = pd.DataFrame(summary_data)
+    summary_df.to_csv(output_path / f"{prefix}_summary.csv", index=False)
diff --git a/tests/test_compare.py b/tests/test_compare.py
new file mode 100644
index 0000000..9116c3c
--- /dev/null
+++ b/tests/test_compare.py
@@ -0,0 +1,214 @@
+"""
+Tests for the comparison module.
+"""
+
+import pyarrow as pa
+import pytest
+
+from tablediff_arrow.compare import DiffResult, TableDiff
+
+
+@pytest.fixture
+def identical_tables():
+    """Create two identical tables."""
+    table = pa.table(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "Bob", "Charlie"],
+            "value": [10.0, 20.0, 30.0],
+        }
+    )
+    return table, table
+
+
+@pytest.fixture
+def tables_with_changes():
+    """Create tables with some changed values."""
+    left = pa.table(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "Bob", "Charlie"],
+            "value": [10.0, 20.0, 30.0],
+        }
+    )
+    right = pa.table(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "Bob", "Charlie"],
+            "value": [10.0, 21.0, 30.0],  # Changed value for id=2
+        }
+    )
+    return left, right
+
+
+@pytest.fixture
+def tables_with_added_removed():
+    """Create tables with added and removed rows."""
+    left = pa.table(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "Bob", "Charlie"],
+            "value": [10.0, 20.0, 30.0],
+        }
+    )
+    right = pa.table(
+        {
+            "id": [1, 3, 4],  # Removed id=2, added id=4
+            "name": ["Alice", "Charlie", "David"],
+            "value": [10.0, 30.0, 40.0],
+        }
+    )
+    return left, right
+
+
+def test_identical_tables(identical_tables):
+    """Test comparison of identical tables."""
+    left, right = identical_tables
+    differ = TableDiff(key_columns=["id"])
+    result = differ.compare_tables(left, right)
+
+    assert result.matched_rows == 3
+    assert result.changed_rows == 0
+    assert result.left_only_rows == 0
+    assert result.right_only_rows == 0
+    assert not result.has_differences
+
+
+def test_tables_with_changes(tables_with_changes):
+    """Test comparison of tables with changed values."""
+    left, right = tables_with_changes
+    differ = TableDiff(key_columns=["id"])
+    result = differ.compare_tables(left, right)
+
+    assert result.matched_rows == 2
+    assert result.changed_rows == 1
+    assert result.left_only_rows == 0
+    assert result.right_only_rows == 0
+    assert result.total_changes == 1
+    assert "value" in result.column_changes
+    assert result.column_changes["value"] == 1
+    assert result.has_differences
+
+
+def test_tables_with_added_removed(tables_with_added_removed):
+    """Test comparison of tables with added and removed rows."""
+    left, right = tables_with_added_removed
+    differ = TableDiff(key_columns=["id"])
+    result = differ.compare_tables(left, right)
+
+    assert result.matched_rows == 2
+    assert result.changed_rows == 0
+    assert result.left_only_rows == 1
+    assert result.right_only_rows == 1
+    assert result.has_differences
+    assert result.left_only is not None
+    assert result.right_only is not None
+
+
+def test_absolute_tolerance():
+    """Test absolute tolerance for numeric comparisons."""
+    left = pa.table(
+        {
+            "id": [1, 2, 3],
+            "value": [10.0, 20.0, 30.0],
+        }
+    )
+    right = pa.table(
+        {
+            "id": [1, 2, 3],
+            "value": [10.01, 20.02, 30.00],  # Small differences
+        }
+    )
+
+    # Without tolerance, should find differences
+    differ = TableDiff(key_columns=["id"])
+    result = differ.compare_tables(left, right)
+    assert result.changed_rows == 2
+
+    # With tolerance, should not find differences
+    differ = TableDiff(key_columns=["id"], tolerance={"value": 0.05})
+    result = differ.compare_tables(left, right)
+    assert result.changed_rows == 0
+
+
+def test_relative_tolerance():
+    """Test relative tolerance for numeric comparisons."""
+    left = pa.table(
+        {
+            "id": [1, 2],
+            "value": [100.0, 1000.0],
+        }
+    )
+    right = pa.table(
+        {
+            "id": [1, 2],
+            "value": [100.5, 1005.0],  # 0.5% and 0.5% difference
+        }
+    )
+
+    # Without tolerance, should find differences
+    differ = TableDiff(key_columns=["id"])
+    result = differ.compare_tables(left, right)
+    assert result.changed_rows == 2
+
+    # With 1% relative tolerance, should not find differences
+    differ = TableDiff(key_columns=["id"], relative_tolerance={"value": 0.01})
+    result = differ.compare_tables(left, right)
+    assert result.changed_rows == 0
+
+
+def test_multiple_key_columns():
+    """Test comparison with multiple key columns."""
+    left = pa.table(
+        {
+            "year": [2020, 2020, 2021],
+            "month": [1, 2, 1],
+            "value": [10.0, 20.0, 30.0],
+        }
+    )
+    right = pa.table(
+        {
+            "year": [2020, 2020, 2021],
+            "month": [1, 2, 1],
+            "value": [10.0, 21.0, 30.0],  # Changed value for 2020-02
+        }
+    )
+
+    differ = TableDiff(key_columns=["year", "month"])
+    result = differ.compare_tables(left, right)
+
+    assert result.matched_rows == 2
+    assert result.changed_rows == 1
+    assert result.total_changes == 1
+
+
+def test_missing_key_column():
+    """Test that missing key column raises an error."""
+    left = pa.table({"id": [1, 2], "value": [10.0, 20.0]})
+    right = pa.table({"id": [1, 2], "value": [10.0, 20.0]})
+
+    differ = TableDiff(key_columns=["missing_col"])
+
+    with pytest.raises(ValueError, match="Key column 'missing_col' not found"):
+        differ.compare_tables(left, right)
+
+
+def test_diff_result_summary():
+    """Test DiffResult summary generation."""
+    result = DiffResult(
+        matched_rows=10,
+        changed_rows=2,
+        left_only_rows=1,
+        right_only_rows=1,
+        total_changes=5,
+        column_changes={"col1": 3, "col2": 2},
+    )
+
+    summary = result.summary()
+    assert "Matched rows: 10" in summary
+    assert "Changed rows: 2" in summary
+    assert "Left-only rows: 1" in summary
+    assert "Right-only rows: 1" in summary
+    assert "Total changes: 5" in summary
+    assert "col1: 3" in summary
+    assert "col2: 2" in summary
diff --git a/tests/test_loader.py b/tests/test_loader.py
new file mode 100644
index 0000000..3d364fc
--- /dev/null
+++ b/tests/test_loader.py
@@ -0,0 +1,97 @@
+"""
+Tests for the loader module.
+"""
+
+import tempfile
+from pathlib import Path
+
+import pyarrow as pa
+import pyarrow.csv as csv
+import pyarrow.parquet as pq
+import pytest
+
+from tablediff_arrow.loader import load_table
+
+
+@pytest.fixture
+def sample_table():
+    """Create a sample PyArrow table for testing."""
+    return pa.table(
+        {
+            "id": [1, 2, 3, 4],
+            "name": ["Alice", "Bob", "Charlie", "David"],
+            "value": [10.5, 20.3, 30.7, 40.1],
+        }
+    )
+
+
+def test_load_parquet(sample_table):
+    """Test loading a Parquet file."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = Path(tmpdir) / "test.parquet"
+        pq.write_table(sample_table, path)
+
+        loaded = load_table(path)
+        assert loaded.equals(sample_table)
+
+
+def test_load_parquet_explicit_format(sample_table):
+    """Test loading a Parquet file with explicit format."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = Path(tmpdir) / "test.pq"
+        pq.write_table(sample_table, path)
+
+        loaded = load_table(path, file_format="parquet")
+        assert loaded.equals(sample_table)
+
+
+def test_load_csv(sample_table):
+    """Test loading a CSV file."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = Path(tmpdir) / "test.csv"
+        csv.write_csv(sample_table, path)
+
+        loaded = load_table(path)
+        # CSV doesn't preserve exact types, so compare as pandas
+        assert loaded.to_pandas().equals(sample_table.to_pandas())
+
+
+def test_load_arrow(sample_table):
+    """Test loading an Arrow IPC file."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = Path(tmpdir) / "test.arrow"
+        with pa.ipc.new_file(path, sample_table.schema) as writer:
+            writer.write_table(sample_table)
+
+        loaded = load_table(path)
+        assert loaded.equals(sample_table)
+
+
+def test_load_feather(sample_table):
+    """Test loading a Feather file (Arrow IPC format)."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = Path(tmpdir) / "test.feather"
+        with pa.ipc.new_file(path, sample_table.schema) as writer:
+            writer.write_table(sample_table)
+
+        loaded = load_table(path)
+        assert loaded.equals(sample_table)
+
+
+def test_invalid_format():
+    """Test that invalid format raises an error."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = Path(tmpdir) / "test.unknown"
+
+        with pytest.raises(ValueError, match="Cannot infer file format"):
+            load_table(path)
+
+
+def test_unsupported_format(sample_table):
+    """Test that unsupported format raises an error."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = Path(tmpdir) / "test.parquet"
+        pq.write_table(sample_table, path)
+
+        with pytest.raises(ValueError, match="Unsupported file format"):
+            load_table(path, file_format="json")
diff --git a/tests/test_reports.py b/tests/test_reports.py
new file mode 100644
index 0000000..d432b71
--- /dev/null
+++ b/tests/test_reports.py
@@ -0,0 +1,100 @@
+"""
+Tests for the reports module.
+"""
+
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+from tablediff_arrow.compare import TableDiff
+from tablediff_arrow.reports import generate_csv_report, generate_html_report
+
+
+@pytest.fixture
+def sample_diff_result():
+    """Create a sample DiffResult for testing."""
+    left = pa.table(
+        {
+            "id": [1, 2, 3, 4],
+            "value": [10.0, 20.0, 30.0, 40.0],
+        }
+    )
+    right = pa.table(
+        {
+            "id": [1, 2, 3, 5],
+            "value": [10.0, 21.0, 30.0, 50.0],
+        }
+    )
+
+    differ = TableDiff(key_columns=["id"])
+    return differ.compare_tables(left, right)
+
+
+def test_generate_html_report(sample_diff_result):
+    """Test HTML report generation."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_path = Path(tmpdir) / "report.html"
+        generate_html_report(sample_diff_result, output_path)
+
+        assert output_path.exists()
+        html_content = output_path.read_text()
+
+        # Check for key elements in HTML
+        assert "Table Diff Report" in html_content
+        assert "Summary" in html_content
+        assert "Matched rows" in html_content
+        assert str(sample_diff_result.matched_rows) in html_content
+
+
+def test_generate_csv_report(sample_diff_result):
+    """Test CSV report generation."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_dir = Path(tmpdir)
+        generate_csv_report(sample_diff_result, output_dir, prefix="test")
+
+        # Check that summary file was created
+        summary_path = output_dir / "test_summary.csv"
+        assert summary_path.exists()
+
+        # Verify summary content
+        summary_df = pd.read_csv(summary_path)
+        assert "metric" in summary_df.columns
+        assert "value" in summary_df.columns
+        assert "matched_rows" in summary_df["metric"].values
+
+        # Check for changes file if there are changes
+        if sample_diff_result.differences is not None:
+            changes_path = output_dir / "test_changes.csv"
+            assert changes_path.exists()
+
+        # Check for left_only file if there are left-only rows
+        if sample_diff_result.left_only is not None:
+            left_only_path = output_dir / "test_left_only.csv"
+            assert left_only_path.exists()
+
+        # Check for right_only file if there are right-only rows
+        if sample_diff_result.right_only is not None:
+            right_only_path = output_dir / "test_right_only.csv"
+            assert right_only_path.exists()
+
+
+def test_html_report_no_differences():
+    """Test HTML report generation when there are no differences."""
+    left = pa.table({"id": [1, 2], "value": [10.0, 20.0]})
+    right = pa.table({"id": [1, 2], "value": [10.0, 20.0]})
+
+    differ = TableDiff(key_columns=["id"])
+    result = differ.compare_tables(left, right)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_path = Path(tmpdir) / "report.html"
+        generate_html_report(result, output_path)
+
+        assert output_path.exists()
+        html_content = output_path.read_text()
+
+        # Should indicate tables match
+        assert "TABLES MATCH" in html_content or "Matched rows" in html_content

From b55663336fe2b40b0a9424c33b3e866835196b74 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Oct 2025 04:41:17 +0000
Subject: [PATCH 3/4] Add examples, CLI tests, and documentation (CONTRIBUTING,
 CHANGELOG)

Co-authored-by: psmman <161755837+psmman@users.noreply.github.com>
---
 CHANGELOG.md      |  36 ++++++++
 CONTRIBUTING.md   | 203 ++++++++++++++++++++++++++++++++++++++++++
 examples.py       | 222 ++++++++++++++++++++++++++++++++++++++++++++++
 tests/test_cli.py | 115 ++++++++++++++++++++++++
 4 files changed, 576 insertions(+)
 create mode 100644 CHANGELOG.md
 create mode 100644 CONTRIBUTING.md
 create mode 100644 examples.py
 create mode 100644 tests/test_cli.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..df6bb40
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,36 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.1.0] - 2025-10-13
+
+### Added
+- Initial release of tablediff-arrow
+- Core table comparison functionality with keyed comparisons
+- Support for Parquet, CSV, and Arrow IPC file formats
+- Support for local and S3 file paths
+- Numeric tolerances (absolute and relative) for comparisons
+- HTML report generation with styled output
+- CSV report generation with separate files for changes, left-only, and right-only rows
+- Command-line interface (CLI) with comprehensive options
+- Python library API for programmatic use
+- Comprehensive test suite with 86% coverage
+- Pre-commit hooks for code quality
+- GitHub Actions CI workflow for automated testing
+- Support for Python 3.10+
+- MIT License
+- Documentation and examples
+
+### Features
+- **Fast Performance**: Built on Apache Arrow for efficient data processing
+- **Multiple Formats**: Parquet, CSV, and Arrow IPC support
+- **S3 Support**: Optional S3 filesystem integration
+- **Flexible Comparisons**: Single or multiple key columns
+- **Numeric Tolerances**: Configure absolute and relative tolerances per column
+- **Rich Reports**: Generate HTML and CSV reports with detailed differences
+- **CLI and Library**: Use as a command-line tool or Python library
+
+[0.1.0]: https://github.com/psmman/tablediff-arrow/releases/tag/v0.1.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..e7d6e5c
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,203 @@
+# Contributing to tablediff-arrow
+
+Thank you for your interest in contributing to tablediff-arrow! This document provides guidelines and instructions for contributing to the project.
+
+## Getting Started
+
+### Prerequisites
+
+- Python 3.10 or higher
+- Git
+- pip
+
+### Setting Up Development Environment
+
+1. Fork and clone the repository:
+   ```bash
+   git clone https://github.com/YOUR_USERNAME/tablediff-arrow.git
+   cd tablediff-arrow
+   ```
+
+2. Install the package in development mode with all dependencies:
+   ```bash
+   pip install -e ".[dev]"
+   ```
+
+3. Install pre-commit hooks:
+   ```bash
+   pre-commit install
+   ```
+
+## Development Workflow
+
+### Running Tests
+
+Run all tests:
+```bash
+pytest
+```
+
+Run tests with coverage:
+```bash
+pytest --cov=tablediff_arrow --cov-report=html
+```
+
+Run specific test file:
+```bash
+pytest tests/test_compare.py
+```
+
+Run specific test:
+```bash
+pytest tests/test_compare.py::test_identical_tables
+```
+
+### Code Quality
+
+#### Formatting
+
+Format code with Black:
+```bash
+black src tests
+```
+
+#### Linting
+
+Lint code with Ruff:
+```bash
+ruff check src tests
+```
+
+Fix auto-fixable issues:
+```bash
+ruff check --fix src tests
+```
+
+#### Type Checking
+
+Run type checking with mypy:
+```bash
+mypy src --ignore-missing-imports
+```
+
+### Pre-commit Hooks
+
+Pre-commit hooks run automatically when you commit. To run manually:
+```bash
+pre-commit run --all-files
+```
+
+## Making Changes
+
+### Branch Naming
+
+Use descriptive branch names:
+- `feature/add-new-format-support`
+- `fix/handle-nan-values`
+- `docs/update-examples`
+
+### Commit Messages
+
+Follow conventional commit format:
+- `feat: add support for JSON format`
+- `fix: handle NaN values in comparisons`
+- `docs: update README with new examples`
+- `test: add tests for S3 functionality`
+- `refactor: simplify comparison logic`
+
+### Pull Request Process
+
+1. Create a new branch for your changes
+2. Make your changes and add tests
+3. Ensure all tests pass and code is properly formatted
+4. Update documentation if needed
+5. Push your branch and create a pull request
+6. Wait for review and address any feedback
+
+## Code Style Guidelines
+
+### Python Style
+
+- Follow PEP 8
+- Use type hints where appropriate
+- Maximum line length: 100 characters
+- Use meaningful variable and function names
+
+### Documentation
+
+- Add docstrings to all public functions and classes
+- Use Google-style docstrings
+- Update README for user-facing changes
+- Add examples for new features
+
+### Testing
+
+- Write tests for all new features
+- Maintain or improve test coverage
+- Use pytest fixtures for test data
+- Test edge cases and error conditions
+
+## Project Structure
+
+```
+tablediff-arrow/
+├── src/
+│   └── tablediff_arrow/
+│       ├── __init__.py       # Package initialization
+│       ├── cli.py            # Command-line interface
+│       ├── compare.py        # Core comparison logic
+│       ├── loader.py         # Data loading utilities
+│       └── reports.py        # Report generation
+├── tests/
+│   ├── test_cli.py           # CLI tests
+│   ├── test_compare.py       # Comparison tests
+│   ├── test_loader.py        # Loader tests
+│   └── test_reports.py       # Report tests
+├── .github/
+│   └── workflows/
+│       └── ci.yml            # CI/CD configuration
+├── pyproject.toml            # Project configuration
+└── README.md                 # Project documentation
+```
+
+## Adding New Features
+
+### Adding a New File Format
+
+1. Update `loader.py` to handle the new format
+2. Add tests in `tests/test_loader.py`
+3. Update CLI to support format selection
+4. Update documentation
+
+### Adding a New Report Format
+
+1. Create a new function in `reports.py`
+2. Add tests in `tests/test_reports.py`
+3. Update CLI to support new format
+4. Update documentation
+
+## Debugging
+
+### Running with Debug Output
+
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+### Interactive Testing
+
+```bash
+python -i examples.py
+```
+
+## Getting Help
+
+- Create an issue for bugs or feature requests
+- Check existing issues before creating new ones
+- Provide detailed information in issues
+- Be respectful and constructive
+
+## License
+
+By contributing to tablediff-arrow, you agree that your contributions will be licensed under the MIT License.
diff --git a/examples.py b/examples.py
new file mode 100644
index 0000000..a08a462
--- /dev/null
+++ b/examples.py
@@ -0,0 +1,222 @@
+"""
+Example usage of tablediff-arrow library.
+
+This script demonstrates various ways to use the tablediff-arrow package
+for comparing tables with different formats and options.
+"""
+
+import tempfile
+from pathlib import Path
+
+import pyarrow as pa
+import pyarrow.parquet as pq
+
+from tablediff_arrow import TableDiff
+from tablediff_arrow.reports import generate_csv_report, generate_html_report
+
+
+def example_basic_comparison():
+    """Basic table comparison example."""
+    print("=" * 60)
+    print("Example 1: Basic Table Comparison")
+    print("=" * 60)
+
+    # Create sample tables
+    left = pa.table(
+        {
+            "id": [1, 2, 3, 4],
+            "name": ["Alice", "Bob", "Charlie", "David"],
+            "amount": [100.0, 200.0, 300.0, 400.0],
+        }
+    )
+
+    right = pa.table(
+        {
+            "id": [1, 2, 3, 5],
+            "name": ["Alice", "Bob", "Charlie", "Eve"],
+            "amount": [100.0, 205.0, 300.0, 500.0],
+        }
+    )
+
+    # Compare tables
+    differ = TableDiff(key_columns=["id"])
+    result = differ.compare_tables(left, right)
+
+    # Print summary
+    print("\n" + result.summary())
+    print()
+
+
+def example_with_tolerance():
+    """Table comparison with numeric tolerance."""
+    print("=" * 60)
+    print("Example 2: Comparison with Numeric Tolerance")
+    print("=" * 60)
+
+    left = pa.table({"id": [1, 2, 3], "value": [100.0, 200.0, 300.0]})
+
+    right = pa.table({"id": [1, 2, 3], "value": [100.01, 200.02, 300.03]})
+
+    # Without tolerance
+    print("\nWithout tolerance:")
+    differ = TableDiff(key_columns=["id"])
+    result = differ.compare_tables(left, right)
+    print(f"Changed rows: {result.changed_rows}")
+
+    # With absolute tolerance
+    print("\nWith absolute tolerance of 0.05:")
+    differ = TableDiff(key_columns=["id"], tolerance={"value": 0.05})
+    result = differ.compare_tables(left, right)
+    print(f"Changed rows: {result.changed_rows}")
+    print()
+
+
+def example_file_comparison():
+    """Compare files from disk."""
+    print("=" * 60)
+    print("Example 3: File Comparison")
+    print("=" * 60)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmppath = Path(tmpdir)
+
+        # Create test files
+        left = pa.table({"id": [1, 2, 3], "value": [10, 20, 30]})
+        right = pa.table({"id": [1, 2, 3], "value": [10, 21, 30]})
+
+        left_path = tmppath / "left.parquet"
+        right_path = tmppath / "right.parquet"
+
+        pq.write_table(left, left_path)
+        pq.write_table(right, right_path)
+
+        # Compare files
+        differ = TableDiff(key_columns=["id"])
+        result = differ.compare_files(left_path, right_path)
+
+        print("\n" + result.summary())
+        print()
+
+
+def example_html_report():
+    """Generate an HTML report."""
+    print("=" * 60)
+    print("Example 4: HTML Report Generation")
+    print("=" * 60)
+
+    left = pa.table(
+        {
+            "id": [1, 2, 3, 4],
+            "category": ["A", "B", "C", "D"],
+            "value": [100, 200, 300, 400],
+        }
+    )
+
+    right = pa.table(
+        {
+            "id": [1, 2, 3, 5],
+            "category": ["A", "B", "C", "E"],
+            "value": [100, 210, 300, 500],
+        }
+    )
+
+    differ = TableDiff(key_columns=["id"])
+    result = differ.compare_tables(left, right)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        report_path = Path(tmpdir) / "report.html"
+        generate_html_report(result, report_path)
+        print(f"\nHTML report generated at: {report_path}")
+        print(f"Report size: {report_path.stat().st_size} bytes")
+        print()
+
+
+def example_csv_reports():
+    """Generate CSV reports."""
+    print("=" * 60)
+    print("Example 5: CSV Reports Generation")
+    print("=" * 60)
+
+    left = pa.table(
+        {
+            "year": [2020, 2020, 2021],
+            "month": [1, 2, 1],
+            "sales": [1000, 2000, 3000],
+        }
+    )
+
+    right = pa.table(
+        {
+            "year": [2020, 2020, 2021],
+            "month": [1, 2, 1],
+            "sales": [1000, 2100, 3000],
+        }
+    )
+
+    differ = TableDiff(key_columns=["year", "month"])
+    result = differ.compare_tables(left, right)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        generate_csv_report(result, tmpdir, prefix="sales_diff")
+        print("\nCSV reports generated:")
+        for csv_file in Path(tmpdir).glob("*.csv"):
+            print(f"  - {csv_file.name} ({csv_file.stat().st_size} bytes)")
+        print()
+
+
+def example_relative_tolerance():
+    """Compare with relative tolerance."""
+    print("=" * 60)
+    print("Example 6: Relative Tolerance")
+    print("=" * 60)
+
+    left = pa.table(
+        {
+            "id": [1, 2],
+            "small_value": [10.0, 20.0],
+            "large_value": [1000.0, 2000.0],
+        }
+    )
+
+    right = pa.table(
+        {
+            "id": [1, 2],
+            "small_value": [10.1, 20.1],  # 1% difference
+            "large_value": [1010.0, 2020.0],  # 1% difference
+        }
+    )
+
+    # With absolute tolerance
+    print("\nWith absolute tolerance of 0.5:")
+    differ = TableDiff(key_columns=["id"], tolerance={"small_value": 0.5, "large_value": 0.5})
+    result = differ.compare_tables(left, right)
+    print(f"Changed rows: {result.changed_rows}")
+    print(f"Changes: {result.column_changes}")
+
+    # With relative tolerance
+    print("\nWith relative tolerance of 2% (0.02):")
+    differ = TableDiff(
+        key_columns=["id"],
+        relative_tolerance={"small_value": 0.02, "large_value": 0.02},
+    )
+    result = differ.compare_tables(left, right)
+    print(f"Changed rows: {result.changed_rows}")
+    print(f"Changes: {result.column_changes}")
+    print()
+
+
+if __name__ == "__main__":
+    print("\n" + "=" * 60)
+    print("tablediff-arrow Examples")
+    print("=" * 60 + "\n")
+
+    example_basic_comparison()
+    example_with_tolerance()
+    example_file_comparison()
+    example_html_report()
+    example_csv_reports()
+    example_relative_tolerance()
+
+    print("=" * 60)
+    print("All examples completed successfully!")
+    print("=" * 60)
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..d12878d
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,115 @@
+"""
+Tests for the CLI module.
+"""
+
+import tempfile
+from pathlib import Path
+
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pytest
+from click.testing import CliRunner
+
+from tablediff_arrow.cli import main
+
+
+@pytest.fixture
+def sample_files():
+    """Create sample Parquet files for testing."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmppath = Path(tmpdir)
+
+        left = pa.table(
+            {
+                "id": [1, 2, 3, 4],
+                "name": ["Alice", "Bob", "Charlie", "David"],
+                "value": [10, 20, 30, 40],
+            }
+        )
+        right = pa.table(
+            {
+                "id": [1, 2, 3, 5],
+                "name": ["Alice", "Bob", "Charlie", "Eve"],
+                "value": [10, 21, 30, 50],
+            }
+        )
+
+        left_path = tmppath / "left.parquet"
+        right_path = tmppath / "right.parquet"
+
+        pq.write_table(left, left_path)
+        pq.write_table(right, right_path)
+
+        yield str(left_path), str(right_path), tmppath
+
+
+def test_cli_basic(sample_files):
+    """Test basic CLI usage."""
+    left_path, right_path, _ = sample_files
+
+    runner = CliRunner()
+    result = runner.invoke(main, [left_path, right_path, "-k", "id"])
+
+    assert result.exit_code == 1  # Exit code 1 for differences
+    assert "Matched rows: 2" in result.output
+    assert "Changed rows: 1" in result.output
+    assert "Left-only rows: 1" in result.output
+    assert "Right-only rows: 1" in result.output
+
+
+def test_cli_html_report(sample_files):
+    """Test HTML report generation via CLI."""
+    left_path, right_path, tmppath = sample_files
+
+    report_path = tmppath / "report.html"
+    runner = CliRunner()
+    result = runner.invoke(main, [left_path, right_path, "-k", "id", "-o", str(report_path)])
+
+    assert result.exit_code == 1
+    assert report_path.exists()
+    assert "HTML report written to" in result.output
+
+
+def test_cli_csv_reports(sample_files):
+    """Test CSV report generation via CLI."""
+    left_path, right_path, tmppath = sample_files
+
+    csv_dir = tmppath / "csv_reports"
+    runner = CliRunner()
+    result = runner.invoke(main, [left_path, right_path, "-k", "id", "--csv-output", str(csv_dir)])
+
+    assert result.exit_code == 1
+    assert csv_dir.exists()
+    assert (csv_dir / "diff_summary.csv").exists()
+
+
+def test_cli_tolerance(sample_files):
+    """Test CLI with tolerance option."""
+    left_path, right_path, _ = sample_files
+
+    runner = CliRunner()
+    result = runner.invoke(main, [left_path, right_path, "-k", "id", "-t", "value:2.0"])
+
+    assert result.exit_code == 1
+    # With tolerance of 2.0, the change from 20 to 21 should be ignored
+    # But we still have left-only and right-only rows
+
+
+def test_cli_missing_key():
+    """Test CLI with missing required key option."""
+    runner = CliRunner()
+    result = runner.invoke(main, ["left.parquet", "right.parquet"])
+
+    assert result.exit_code != 0
+    assert "Error" in result.output or "Missing option" in result.output
+
+
+def test_cli_help():
+    """Test CLI help command."""
+    runner = CliRunner()
+    result = runner.invoke(main, ["--help"])
+
+    assert result.exit_code == 0
+    assert "Compare two tables" in result.output
+    assert "--key" in result.output
+    assert "--tolerance" in result.output

From ee71bc5e15525b8a64943ec1ac359e4c1434021f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Oct 2025 04:42:53 +0000
Subject: [PATCH 4/4] Add QUICKSTART guide for quick getting started

Co-authored-by: psmman <161755837+psmman@users.noreply.github.com>
---
 QUICKSTART.md | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 QUICKSTART.md

diff --git a/QUICKSTART.md b/QUICKSTART.md
new file mode 100644
index 0000000..d925bfe
--- /dev/null
+++ b/QUICKSTART.md
@@ -0,0 +1,161 @@
+# Quick Start Guide
+
+Get started with tablediff-arrow in minutes!
+
+## Installation
+
+```bash
+pip install tablediff-arrow
+```
+
+## 5-Minute Tutorial
+
+### 1. Compare Two Parquet Files
+
+```bash
+tablediff data1.parquet data2.parquet -k id
+```
+
+### 2. Generate an HTML Report
+
+```bash
+tablediff data1.parquet data2.parquet -k id -o report.html
+```
+
+Open `report.html` in your browser to see a beautiful, styled comparison report.
+
+### 3. Use in Python
+
+```python
+from tablediff_arrow import TableDiff
+import pyarrow.parquet as pq
+
+# Load your data
+left = pq.read_table("data1.parquet")
+right = pq.read_table("data2.parquet")
+
+# Compare
+differ = TableDiff(key_columns=["id"])
+result = differ.compare_tables(left, right)
+
+# Print summary
+print(result.summary())
+```
+
+### 4. Handle Numeric Differences
+
+```bash
+# Allow 1% tolerance for price differences
+tablediff sales_old.csv sales_new.csv -k id -r price:0.01
+```
+
+### 5. Compare CSV Files
+
+```bash
+tablediff left.csv right.csv -k customer_id --csv-output reports/
+```
+
+This generates multiple CSV files:
+- `reports/diff_summary.csv` - Summary statistics
+- `reports/diff_changes.csv` - Detailed changes
+- `reports/diff_left_only.csv` - Rows only in left
+- `reports/diff_right_only.csv` - Rows only in right
+
+## Common Use Cases
+
+### Multiple Key Columns
+
+```bash
+tablediff data1.parquet data2.parquet -k year -k month -k product
+```
+
+### Absolute Tolerance
+
+```bash
+# Allow differences up to 0.01 in the amount column
+tablediff data1.csv data2.csv -k id -t amount:0.01
+```
+
+### S3 Files
+
+```bash
+pip install tablediff-arrow[s3]
+tablediff s3://bucket/data1.parquet s3://bucket/data2.parquet -k id --s3
+```
+
+## Python API Examples
+
+### Basic Comparison
+
+```python
+from tablediff_arrow import TableDiff
+import pyarrow as pa
+
+left = pa.table({'id': [1, 2, 3], 'value': [10, 20, 30]})
+right = pa.table({'id': [1, 2, 3], 'value': [10, 21, 30]})
+
+differ = TableDiff(key_columns=['id'])
+result = differ.compare_tables(left, right)
+
+print(f"Changed rows: {result.changed_rows}")
+print(f"Total changes: {result.total_changes}")
+```
+
+### With Tolerances
+
+```python
+differ = TableDiff(
+    key_columns=['id'],
+    tolerance={'amount': 0.01},           # Absolute tolerance
+    relative_tolerance={'rate': 0.001}    # Relative tolerance (0.1%)
+)
+result = differ.compare_tables(left, right)
+```
+
+### Generate Reports
+
+```python
+from tablediff_arrow.reports import generate_html_report, generate_csv_report
+
+# HTML report
+generate_html_report(result, 'comparison.html')
+
+# CSV reports
+generate_csv_report(result, 'output/', prefix='diff')
+```
+
+## Next Steps
+
+- Read the full [README](README.md) for more details
+- Check out [examples.py](examples.py) for comprehensive examples
+- Read [CONTRIBUTING.md](CONTRIBUTING.md) to contribute
+- See [CHANGELOG.md](CHANGELOG.md) for version history
+
+## Getting Help
+
+- Create an issue on GitHub: https://github.com/psmman/tablediff-arrow/issues
+- Read the documentation in the README
+- Check the examples file for common patterns
+
+## Quick Reference
+
+### CLI Options
+
+```
+-k, --key              Key column(s) for comparison (required, repeatable)
+-t, --tolerance        Absolute tolerance (format: column:value)
+-r, --relative-tol     Relative tolerance (format: column:value)
+-o, --output           HTML report output path
+--csv-output           Directory for CSV reports
+--left-format          Force format for left file (parquet/csv/arrow)
+--right-format         Force format for right file (parquet/csv/arrow)
+--s3                   Enable S3 filesystem support
+```
+
+### Exit Codes
+
+- `0` - Tables match (no differences)
+- `1` - Differences found
+- `2` - Error occurred
+
+Happy diffing! 🚀