Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,57 @@ $ autorepro scan --json
"detected": [],
"languages": {}
}

# Enhanced scanning with depth control
$ autorepro scan --depth 0
Detected: python
- python -> pyproject.toml

$ autorepro scan --depth 2
Detected: node, python
- node -> package.json
- python -> pyproject.toml

# Filtering with ignore patterns
$ autorepro scan --depth 2 --ignore 'node_modules/**' --ignore 'dist/**'
Detected: python
- python -> pyproject.toml

# Respecting .gitignore rules
$ autorepro scan --respect-gitignore
Detected: python
- python -> pyproject.toml

# JSON with file samples
$ autorepro scan --json --show 3
{
"detected": ["python"],
"languages": {
"python": {
"score": 4,
"reasons": [...],
"files_sample": ["./pyproject.toml", "./main.py", "./utils.py"]
}
}
}
```

**Status:** `scan` is implemented with weighted scoring system and dual output formats (text/JSON).
**Status:** `scan` is implemented with weighted scoring system, dual output formats (text/JSON), and enhanced hierarchical scanning capabilities.

**Scan Options:**
- `--json`: Output in JSON format with scores and detailed reasons
- `--show-scores`: Add score lines to text output (ignored with --json)
- `--depth N`: Maximum depth to scan (0 for root only, default: unlimited)
- `--ignore PATTERN`: Ignore files/directories matching pattern (repeatable)
- `--respect-gitignore`: Respect .gitignore rules when scanning
- `--show N`: Number of sample files per language to include in JSON output (default: 5)

**Enhanced Scanning Features:**
- **Hierarchical scanning**: Control scan depth with `--depth` parameter
- **Pattern-based filtering**: Use `--ignore` to exclude files/directories by glob patterns
- **Gitignore integration**: `--respect-gitignore` honors .gitignore rules including negation patterns (`!pattern`)
- **File sampling**: JSON output includes `files_sample` array with up to N sample files per language
- **Stable ordering**: Sample files are sorted deterministically for consistent results

**Weighted Scoring System:**
- **Lock files (weight 4)**: `pnpm-lock.yaml`, `yarn.lock`, `npm-shrinkwrap.json`, `package-lock.json`, `go.sum`, `Cargo.lock`
Expand All @@ -233,9 +277,10 @@ $ autorepro scan --json
- **Source files (weight 1)**: `*.py`, `*.go`, `*.rs`, `*.java`, `*.cs`, `*.js`, `*.ts`, etc.

**Scan Behavior:**
- **Root-only**: Scans only the current directory (non-recursive)
- **Deterministic ordering**: Languages and reasons are sorted alphabetically
- **Configurable depth**: `--depth 0` scans root only, `--depth N` scans N levels deep, no flag scans unlimited depth
- **Deterministic ordering**: Languages, reasons, and file samples are sorted alphabetically
- **Score accumulation**: Multiple indicators for same language add their weights together
- **Filtering integration**: Ignored files don't contribute to detection scores or language presence
- **Exit code 0**: Always succeeds, even with no detections

**Supported Languages:**
Expand Down
100 changes: 83 additions & 17 deletions autorepro/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,27 @@ def _setup_scan_parser(subparsers) -> argparse.ArgumentParser:
action="store_true",
help="Show scores in text output (only effective without --json)",
)
scan_parser.add_argument(
"--depth",
type=int,
help="Maximum depth to scan (0 for root only, default: unlimited)",
)
scan_parser.add_argument(
"--ignore",
action="append",
default=[],
help="Ignore files/directories matching pattern (can be specified multiple times)",
)
scan_parser.add_argument(
"--respect-gitignore",
action="store_true",
help="Respect .gitignore rules when scanning",
)
scan_parser.add_argument(
"--show",
type=int,
help="Number of sample files per language to include in JSON output (default: 5)",
)
scan_parser.add_argument(
"-q",
"--quiet",
Expand Down Expand Up @@ -489,12 +510,28 @@ def create_parser() -> argparse.ArgumentParser:
@time_execution(log_threshold=0.5)
@handle_errors({}, default_return=1, log_errors=True)
@log_operation("language detection scan")
def cmd_scan(json_output: bool = False, show_scores: bool = False) -> int:
def cmd_scan( # noqa: PLR0913
json_output: bool = False,
show_scores: bool = False,
depth: int | None = None,
ignore_patterns: list[str] | None = None,
respect_gitignore: bool = False,
show_files_sample: int | None = None,
) -> int:
"""Handle the scan command."""
if ignore_patterns is None:
ignore_patterns = []

if json_output:
# Use new weighted evidence collection for JSON output
try:
evidence = collect_evidence(Path("."))
evidence = collect_evidence(
Path("."),
depth=depth,
ignore_patterns=ignore_patterns,
respect_gitignore=respect_gitignore,
show_files_sample=show_files_sample,
)
detected_languages = sorted(evidence.keys())
except (OSError, PermissionError):
# Handle I/O errors gracefully for JSON output - return empty results
Expand All @@ -516,31 +553,48 @@ def cmd_scan(json_output: bool = False, show_scores: bool = False) -> int:
print(json.dumps(json_result, indent=2))
return 0
else:
# Use legacy text output
detected = detect_languages(".")
# Use enhanced evidence collection for text output too
try:
evidence = collect_evidence(
Path("."),
depth=depth,
ignore_patterns=ignore_patterns,
respect_gitignore=respect_gitignore,
)
except (OSError, PermissionError):
print("No known languages detected.")
return 0

if not detected:
if not evidence:
print("No known languages detected.")
return 0

# Extract language names for header
languages = [lang for lang, _ in detected]
# Extract language names for header (sorted)
languages = sorted(evidence.keys())
print(f"Detected: {', '.join(languages)}")

# Print details for each language
for lang, reasons in detected:
reasons_str = ", ".join(reasons)
for lang in languages:
lang_data = evidence[lang]
reasons = lang_data.get("reasons", [])

# Extract unique patterns for display (with type check)
if isinstance(reasons, list):
patterns = list(
dict.fromkeys(
reason["pattern"]
for reason in reasons
if isinstance(reason, dict)
)
)
reasons_str = ", ".join(patterns)
else:
reasons_str = "unknown"
print(f"- {lang} -> {reasons_str}")

# Add score if --show-scores is enabled
if show_scores:
try:
evidence = collect_evidence(Path("."))
if lang in evidence:
print(f" Score: {evidence[lang]['score']}")
except (OSError, PermissionError):
# Skip scores if evidence collection fails
pass
print(f" Score: {lang_data['score']}")

return 0

Expand Down Expand Up @@ -1911,9 +1965,21 @@ def _dispatch_scan_command(args) -> int:
# Load settings and apply plugins before any rule usage
settings = _get_project_settings(args)
_apply_plugins_env(settings)

# Determine show_files_sample value
show_value = getattr(args, "show", None)
json_output = getattr(args, "json", False)
show_files_sample = (
show_value if show_value is not None else (5 if json_output else None)
)

return cmd_scan(
json_output=getattr(args, "json", False),
json_output=json_output,
show_scores=getattr(args, "show_scores", False),
depth=getattr(args, "depth", None),
ignore_patterns=getattr(args, "ignore", []),
respect_gitignore=getattr(args, "respect_gitignore", False),
show_files_sample=show_files_sample,
)


Expand Down
Loading
Loading