From 421f753eef7c6cb802b566649e6e032c1a3835e7 Mon Sep 17 00:00:00 2001 From: Selman AYAN Date: Sun, 5 Oct 2025 17:11:48 +0100 Subject: [PATCH 1/3] feat: add subdirectory .gitignore support for monorepos Load all .gitignore files in repository tree recursively and merge patterns with proper precedence (deeper overrides shallower). Adjust relative patterns to be repo-root-relative. Changes: - Update RepoMapper._load_gitignore() with recursive loading - Update CodeSearcher._load_gitignore() with same implementation - Add comprehensive unit tests for multi-level .gitignore - Add integration test with humanlayer repo validation Fixes token overflow on large monorepos with multiple .gitignore files. Before: 98,895 files (4.4M tokens) After: Expected ~670 files (~50k tokens) Related to SOL-1 implementation plan Phase 2. --- src/kit/code_searcher.py | 78 ++++++++++-- src/kit/repo_mapper.py | 77 +++++++++++- tests/integration/test_humanlayer_repo.py | 44 +++++++ tests/test_gitignore.py | 138 ++++++++++++++++++++++ 4 files changed, 325 insertions(+), 12 deletions(-) create mode 100644 tests/integration/test_humanlayer_repo.py create mode 100644 tests/test_gitignore.py diff --git a/src/kit/code_searcher.py b/src/kit/code_searcher.py index 32112163..6371b8c0 100644 --- a/src/kit/code_searcher.py +++ b/src/kit/code_searcher.py @@ -1,5 +1,7 @@ from __future__ import annotations +import logging +import os import re from dataclasses import dataclass from pathlib import Path @@ -35,16 +37,78 @@ def __init__(self, repo_path: str) -> None: self._gitignore_spec = self._load_gitignore() # Load gitignore spec def _load_gitignore(self): - """Loads .gitignore rules from the repository root.""" - gitignore_path = self.repo_path / ".gitignore" - if gitignore_path.exists(): + """Load all .gitignore files in repository tree and merge them. + + Returns a PathSpec that respects all .gitignore files, with proper + precedence (deeper paths override shallower ones). + """ + gitignore_files = [] + + # Collect all .gitignore files + for dirpath, dirnames, filenames in os.walk(self.repo_path): + # Skip .git directory + if ".git" in Path(dirpath).parts: + continue + + if ".gitignore" in filenames: + gitignore_path = Path(dirpath) / ".gitignore" + gitignore_files.append(gitignore_path) + + if not gitignore_files: + return None + + # Sort by depth (deepest first) for correct precedence + gitignore_files.sort(key=lambda p: len(p.parts), reverse=True) + + # Collect all patterns with proper path prefixes + all_patterns = [] + for gitignore_path in gitignore_files: + gitignore_dir = gitignore_path.parent + try: with open(gitignore_path, "r", encoding="utf-8") as f: - return pathspec.PathSpec.from_lines("gitwildmatch", f) + patterns = f.readlines() + + # Calculate relative base path from repo root + try: + rel_base = gitignore_dir.relative_to(self.repo_path) + except ValueError: + # gitignore outside repo (shouldn't happen, but be safe) + continue + + # Process each pattern + for pattern in patterns: + pattern = pattern.strip() + + # Skip empty lines and comments + if not pattern or pattern.startswith("#"): + continue + + # Adjust pattern to be relative to repo root + if str(rel_base) != ".": + # Pattern is in subdirectory - prefix with path + if pattern.startswith("/"): + # Absolute pattern (from gitignore dir) - make relative to repo + adjusted = f"{rel_base}/{pattern[1:]}" + else: + # Relative pattern - prefix with directory path + adjusted = f"{rel_base}/{pattern}" + else: + # Pattern is in root .gitignore - use as-is + adjusted = pattern + + all_patterns.append(adjusted) + except Exception as e: - # Log this error if logging is set up, or print - print(f"Warning: Could not load .gitignore: {e}") - return None + # Log warning but continue processing other .gitignore files + logging.warning(f"Could not load {gitignore_path}: {e}") + continue + + if not all_patterns: + return None + + # Create single merged pathspec + return pathspec.PathSpec.from_lines("gitwildmatch", all_patterns) def _should_ignore(self, file: Path) -> bool: """Checks if a file should be ignored based on .gitignore rules.""" diff --git a/src/kit/repo_mapper.py b/src/kit/repo_mapper.py index 8d0dee55..6fd9aa76 100644 --- a/src/kit/repo_mapper.py +++ b/src/kit/repo_mapper.py @@ -24,11 +24,78 @@ def __init__(self, repo_path: str) -> None: self._gitignore_spec = self._load_gitignore() def _load_gitignore(self): - gitignore_path = self.repo_path / ".gitignore" - if gitignore_path.exists(): - with open(gitignore_path) as f: - return pathspec.PathSpec.from_lines("gitwildmatch", f) - return None + """Load all .gitignore files in repository tree and merge them. + + Returns a PathSpec that respects all .gitignore files, with proper + precedence (deeper paths override shallower ones). + """ + gitignore_files = [] + + # Collect all .gitignore files + for dirpath, dirnames, filenames in os.walk(self.repo_path): + # Skip .git directory + if ".git" in Path(dirpath).parts: + continue + + if ".gitignore" in filenames: + gitignore_path = Path(dirpath) / ".gitignore" + gitignore_files.append(gitignore_path) + + if not gitignore_files: + return None + + # Sort by depth (deepest first) for correct precedence + gitignore_files.sort(key=lambda p: len(p.parts), reverse=True) + + # Collect all patterns with proper path prefixes + all_patterns = [] + for gitignore_path in gitignore_files: + gitignore_dir = gitignore_path.parent + + try: + with open(gitignore_path, "r", encoding="utf-8") as f: + patterns = f.readlines() + + # Calculate relative base path from repo root + try: + rel_base = gitignore_dir.relative_to(self.repo_path) + except ValueError: + # gitignore outside repo (shouldn't happen, but be safe) + continue + + # Process each pattern + for pattern in patterns: + pattern = pattern.strip() + + # Skip empty lines and comments + if not pattern or pattern.startswith("#"): + continue + + # Adjust pattern to be relative to repo root + if str(rel_base) != ".": + # Pattern is in subdirectory - prefix with path + if pattern.startswith("/"): + # Absolute pattern (from gitignore dir) - make relative to repo + adjusted = f"{rel_base}/{pattern[1:]}" + else: + # Relative pattern - prefix with directory path + adjusted = f"{rel_base}/{pattern}" + else: + # Pattern is in root .gitignore - use as-is + adjusted = pattern + + all_patterns.append(adjusted) + + except Exception as e: + # Log warning but continue processing other .gitignore files + logging.warning(f"Could not load {gitignore_path}: {e}") + continue + + if not all_patterns: + return None + + # Create single merged pathspec + return pathspec.PathSpec.from_lines("gitwildmatch", all_patterns) def _should_ignore(self, file: Path) -> bool: # Handle potential symlink resolution mismatches diff --git a/tests/integration/test_humanlayer_repo.py b/tests/integration/test_humanlayer_repo.py new file mode 100644 index 00000000..712b50cb --- /dev/null +++ b/tests/integration/test_humanlayer_repo.py @@ -0,0 +1,44 @@ +import pytest +from pathlib import Path +from kit.repo_mapper import RepoMapper +import subprocess + + +@pytest.mark.integration +@pytest.mark.skipif( + not Path("/home/selman/dev/humanlayer").exists(), + reason="Requires humanlayer repository" +) +def test_humanlayer_repo_gitignore(): + """Integration test: Verify fix works on actual humanlayer repo.""" + + # Get git's file count + result = subprocess.run( + ["git", "ls-files"], + cwd="/home/selman/dev/humanlayer", + capture_output=True, + text=True + ) + git_files = set(result.stdout.strip().split("\n")) + git_count = len(git_files) + + # Get kit's file count + mapper = RepoMapper("/home/selman/dev/humanlayer") + tree = mapper.get_file_tree() + kit_count = len(tree) + kit_paths = {item["path"] for item in tree} + + # Should be approximately equal (within 10% tolerance for build artifacts) + tolerance = 0.1 + assert abs(kit_count - git_count) / git_count < tolerance, \ + f"Kit returned {kit_count} files, Git tracks {git_count} files" + + # Should be well under token limit (assuming ~100 chars per file path) + estimated_tokens = kit_count * 100 + assert estimated_tokens < 25000, \ + f"Estimated {estimated_tokens} tokens (exceeds 25k limit)" + + # Verify no node_modules files included + node_modules_files = [p for p in kit_paths if "node_modules" in p] + assert len(node_modules_files) == 0, \ + f"Found {len(node_modules_files)} node_modules files (should be 0)" diff --git a/tests/test_gitignore.py b/tests/test_gitignore.py new file mode 100644 index 00000000..2583ef03 --- /dev/null +++ b/tests/test_gitignore.py @@ -0,0 +1,138 @@ +import pytest +from pathlib import Path +import tempfile +from kit.repo_mapper import RepoMapper + + +def test_root_gitignore_only(): + """Test basic root .gitignore works as before.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + + # Create root .gitignore + (repo / ".gitignore").write_text("*.pyc\n__pycache__/\n") + + # Create test files + (repo / "test.py").touch() + (repo / "test.pyc").touch() + (repo / "__pycache__").mkdir() + (repo / "__pycache__" / "test.pyc").touch() + + mapper = RepoMapper(str(repo)) + tree = mapper.get_file_tree() + + # Should only include test.py, not .pyc or __pycache__ + paths = [item["path"] for item in tree] + assert "test.py" in paths + assert "test.pyc" not in paths + assert "__pycache__/test.pyc" not in paths + + +def test_subdirectory_gitignore(): + """Test subdirectory .gitignore files are respected.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + + # Create subdirectory with its own .gitignore + subdir = repo / "frontend" + subdir.mkdir() + (subdir / ".gitignore").write_text("node_modules/\n*.log\n") + + # Create test files + (subdir / "app.js").touch() + (subdir / "debug.log").touch() + node_modules = subdir / "node_modules" + node_modules.mkdir() + (node_modules / "package.json").touch() + + mapper = RepoMapper(str(repo)) + tree = mapper.get_file_tree() + + # Should include app.js but not debug.log or node_modules + paths = [item["path"] for item in tree] + assert "frontend/app.js" in paths + assert "frontend/debug.log" not in paths + assert "frontend/node_modules/package.json" not in paths + + +def test_nested_gitignore_precedence(): + """Test deeper .gitignore files override shallower ones.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + + # Root .gitignore ignores *.tmp + (repo / ".gitignore").write_text("*.tmp\n") + + # Subdirectory .gitignore allows *.tmp (negation) + subdir = repo / "special" + subdir.mkdir() + (subdir / ".gitignore").write_text("!*.tmp\n") + + # Create test files + (repo / "root.tmp").touch() + (subdir / "special.tmp").touch() + + mapper = RepoMapper(str(repo)) + tree = mapper.get_file_tree() + + # Root .tmp should be ignored, but special/ .tmp should be included + paths = [item["path"] for item in tree] + assert "root.tmp" not in paths + assert "special/special.tmp" in paths # Negation pattern + + +def test_multiple_subdirectory_gitignores(): + """Test multiple subdirectories each with .gitignore files.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + + # Frontend with node_modules + frontend = repo / "frontend" + frontend.mkdir() + (frontend / ".gitignore").write_text("node_modules/\n") + (frontend / "app.js").touch() + fe_nm = frontend / "node_modules" + fe_nm.mkdir() + (fe_nm / "react.js").touch() + + # Backend with venv + backend = repo / "backend" + backend.mkdir() + (backend / ".gitignore").write_text("venv/\n__pycache__/\n") + (backend / "main.py").touch() + be_venv = backend / "venv" + be_venv.mkdir() + (be_venv / "python").touch() + + mapper = RepoMapper(str(repo)) + tree = mapper.get_file_tree() + + paths = [item["path"] for item in tree] + + # Should include source files + assert "frontend/app.js" in paths + assert "backend/main.py" in paths + + # Should exclude ignored directories + assert "frontend/node_modules/react.js" not in paths + assert "backend/venv/python" not in paths + + +def test_no_gitignore_files(): + """Test repository with no .gitignore files.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + + # Create files without .gitignore + (repo / "test.py").touch() + subdir = repo / "src" + subdir.mkdir() + (subdir / "main.py").touch() + + mapper = RepoMapper(str(repo)) + tree = mapper.get_file_tree() + + # All files should be included + paths = [item["path"] for item in tree] + assert "test.py" in paths + assert "src/main.py" in paths From 109c2bf7c2b737842374d23715e7b9f40e082ef7 Mon Sep 17 00:00:00 2001 From: tnm Date: Sat, 22 Nov 2025 19:23:12 -0800 Subject: [PATCH 2/3] Fix .gitignore precedence and pattern handling Fixes #144 by correcting the order and handling of subdirectory .gitignore files: 1. **Fixed pattern precedence**: Changed sort order from deepest-first to shallowest-first, allowing subdirectory patterns to properly override parent patterns (Git processes .gitignore from root to leaf) 2. **Fixed negation patterns**: Preserve ! prefix at the beginning when adjusting patterns for subdirectories (was becoming `dir/!pattern` instead of `!dir/**/pattern`) 3. **Fixed subdirectory pattern scope**: Patterns in subdirectory .gitignore files now use `/**/` to match at any depth under that directory (e.g., `level1/**/*.cache` instead of `level1/*.cache`), matching Git's actual behavior 4. **Added comprehensive tests**: - Test CodeSearcher respects subdirectory .gitignore - Test absolute patterns in subdirectories - Test complex negation scenarios - Test deeply nested .gitignore files with multiple levels All original tests from PR #144 now pass, plus 4 additional edge case tests. --- src/kit/code_searcher.py | 21 +++++-- src/kit/repo_mapper.py | 21 +++++-- tests/test_gitignore.py | 119 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 10 deletions(-) diff --git a/src/kit/code_searcher.py b/src/kit/code_searcher.py index 6371b8c0..cc201cc6 100644 --- a/src/kit/code_searcher.py +++ b/src/kit/code_searcher.py @@ -40,7 +40,7 @@ def _load_gitignore(self): """Load all .gitignore files in repository tree and merge them. Returns a PathSpec that respects all .gitignore files, with proper - precedence (deeper paths override shallower ones). + precedence (patterns from deeper directories can override root patterns). """ gitignore_files = [] @@ -57,8 +57,9 @@ def _load_gitignore(self): if not gitignore_files: return None - # Sort by depth (deepest first) for correct precedence - gitignore_files.sort(key=lambda p: len(p.parts), reverse=True) + # Sort by depth (shallowest first) for correct precedence + # Git processes .gitignore files from root to leaf, so later patterns can override earlier ones + gitignore_files.sort(key=lambda p: len(p.parts)) # Collect all patterns with proper path prefixes all_patterns = [] @@ -84,6 +85,11 @@ def _load_gitignore(self): if not pattern or pattern.startswith("#"): continue + # Handle negation patterns (must preserve ! at the beginning) + is_negation = pattern.startswith("!") + if is_negation: + pattern = pattern[1:] # Remove the ! temporarily + # Adjust pattern to be relative to repo root if str(rel_base) != ".": # Pattern is in subdirectory - prefix with path @@ -91,12 +97,17 @@ def _load_gitignore(self): # Absolute pattern (from gitignore dir) - make relative to repo adjusted = f"{rel_base}/{pattern[1:]}" else: - # Relative pattern - prefix with directory path - adjusted = f"{rel_base}/{pattern}" + # Relative pattern - applies to directory and all subdirectories + # Use /** to match files at any depth under the directory + adjusted = f"{rel_base}/**/{pattern}" if "*" in pattern else f"{rel_base}/{pattern}" else: # Pattern is in root .gitignore - use as-is adjusted = pattern + # Re-add negation prefix if needed + if is_negation: + adjusted = f"!{adjusted}" + all_patterns.append(adjusted) except Exception as e: diff --git a/src/kit/repo_mapper.py b/src/kit/repo_mapper.py index 6fd9aa76..b2217f6d 100644 --- a/src/kit/repo_mapper.py +++ b/src/kit/repo_mapper.py @@ -27,7 +27,7 @@ def _load_gitignore(self): """Load all .gitignore files in repository tree and merge them. Returns a PathSpec that respects all .gitignore files, with proper - precedence (deeper paths override shallower ones). + precedence (patterns from deeper directories can override root patterns). """ gitignore_files = [] @@ -44,8 +44,9 @@ def _load_gitignore(self): if not gitignore_files: return None - # Sort by depth (deepest first) for correct precedence - gitignore_files.sort(key=lambda p: len(p.parts), reverse=True) + # Sort by depth (shallowest first) for correct precedence + # Git processes .gitignore files from root to leaf, so later patterns can override earlier ones + gitignore_files.sort(key=lambda p: len(p.parts)) # Collect all patterns with proper path prefixes all_patterns = [] @@ -71,6 +72,11 @@ def _load_gitignore(self): if not pattern or pattern.startswith("#"): continue + # Handle negation patterns (must preserve ! at the beginning) + is_negation = pattern.startswith("!") + if is_negation: + pattern = pattern[1:] # Remove the ! temporarily + # Adjust pattern to be relative to repo root if str(rel_base) != ".": # Pattern is in subdirectory - prefix with path @@ -78,12 +84,17 @@ def _load_gitignore(self): # Absolute pattern (from gitignore dir) - make relative to repo adjusted = f"{rel_base}/{pattern[1:]}" else: - # Relative pattern - prefix with directory path - adjusted = f"{rel_base}/{pattern}" + # Relative pattern - applies to directory and all subdirectories + # Use /** to match files at any depth under the directory + adjusted = f"{rel_base}/**/{pattern}" if "*" in pattern else f"{rel_base}/{pattern}" else: # Pattern is in root .gitignore - use as-is adjusted = pattern + # Re-add negation prefix if needed + if is_negation: + adjusted = f"!{adjusted}" + all_patterns.append(adjusted) except Exception as e: diff --git a/tests/test_gitignore.py b/tests/test_gitignore.py index 2583ef03..a8798e05 100644 --- a/tests/test_gitignore.py +++ b/tests/test_gitignore.py @@ -136,3 +136,122 @@ def test_no_gitignore_files(): paths = [item["path"] for item in tree] assert "test.py" in paths assert "src/main.py" in paths + + +def test_code_searcher_respects_subdirectory_gitignore(): + """Test CodeSearcher also respects subdirectory .gitignore files.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + from kit.code_searcher import CodeSearcher + + # Create subdirectory with its own .gitignore + subdir = repo / "src" + subdir.mkdir() + (subdir / ".gitignore").write_text("*.log\n") + + # Create test files with searchable content + (repo / "root.py").write_text("search_pattern") + (subdir / "code.py").write_text("search_pattern") + (subdir / "debug.log").write_text("search_pattern") + + searcher = CodeSearcher(str(repo)) + results = searcher.search_text("search_pattern") + + # Should find matches in .py but not .log + files = [r["file"] for r in results] + assert "root.py" in files + assert "src/code.py" in files + assert "src/debug.log" not in files + + +def test_absolute_patterns_in_subdirectory(): + """Test absolute patterns (starting with /) in subdirectory .gitignore.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + + # Subdirectory with absolute pattern + subdir = repo / "frontend" + subdir.mkdir() + (subdir / ".gitignore").write_text("/build/\n") + + # Create test files + (subdir / "src").mkdir() + (subdir / "src" / "app.js").touch() + (subdir / "build").mkdir() + (subdir / "build" / "bundle.js").touch() + (subdir / "src" / "build").mkdir() + (subdir / "src" / "build" / "config.js").touch() + + mapper = RepoMapper(str(repo)) + tree = mapper.get_file_tree() + + paths = [item["path"] for item in tree] + # /build/ should only ignore frontend/build/, not frontend/src/build/ + assert "frontend/src/app.js" in paths + assert "frontend/build/bundle.js" not in paths + assert "frontend/src/build/config.js" in paths + + +def test_complex_negation_patterns(): + """Test complex negation scenarios.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + + # Root ignores all .env files + (repo / ".gitignore").write_text("*.env\n") + + # Config directory allows .env.example + config = repo / "config" + config.mkdir() + (config / ".gitignore").write_text("!*.env.example\n") + + # Create test files + (repo / "root.env").touch() + (repo / "README.md").touch() + (config / "app.env").touch() + (config / "template.env.example").touch() + + mapper = RepoMapper(str(repo)) + tree = mapper.get_file_tree() + + paths = [item["path"] for item in tree] + assert "README.md" in paths + assert "root.env" not in paths + assert "config/app.env" not in paths + assert "config/template.env.example" in paths # Negation allows it + + +def test_deeply_nested_gitignores(): + """Test .gitignore files at multiple depth levels.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + + # Root .gitignore + (repo / ".gitignore").write_text("*.tmp\n") + + # Level 1 + l1 = repo / "level1" + l1.mkdir() + (l1 / ".gitignore").write_text("*.cache\n") + (l1 / "file.txt").touch() + (l1 / "file.tmp").touch() + (l1 / "file.cache").touch() + + # Level 2 + l2 = l1 / "level2" + l2.mkdir() + (l2 / ".gitignore").write_text("!*.tmp\n") # Re-allow .tmp here + (l2 / "deep.txt").touch() + (l2 / "deep.tmp").touch() + (l2 / "deep.cache").touch() + + mapper = RepoMapper(str(repo)) + tree = mapper.get_file_tree() + + paths = [item["path"] for item in tree] + assert "level1/file.txt" in paths + assert "level1/file.tmp" not in paths # Ignored by root + assert "level1/file.cache" not in paths # Ignored by level1 + assert "level1/level2/deep.txt" in paths + assert "level1/level2/deep.tmp" in paths # Negation allows it + assert "level1/level2/deep.cache" not in paths # Still ignored by level1 From e27bf673a942d5466ac2b34d6aef2bccbb7a9f24 Mon Sep 17 00:00:00 2001 From: tnm Date: Sat, 22 Nov 2025 20:01:11 -0800 Subject: [PATCH 3/3] Fix pattern scope bug and refactor code - Fix: Remove conditional for /**/ - ALL relative patterns in subdirectory .gitignore files must match at any depth, not just patterns with wildcards (e.g., 'node_modules' in frontend/.gitignore should match at ANY depth) - Add test case for patterns without wildcards matching at multiple depths - Refactor: Extract _adjust_gitignore_pattern() helper method to reduce complexity and improve readability of _load_gitignore() Addresses Vercel bot review comment. --- src/kit/code_searcher.py | 55 +++++++++++++++++++++------------------- src/kit/repo_mapper.py | 55 +++++++++++++++++++++------------------- tests/test_gitignore.py | 46 +++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 52 deletions(-) diff --git a/src/kit/code_searcher.py b/src/kit/code_searcher.py index cc201cc6..0c7e5f23 100644 --- a/src/kit/code_searcher.py +++ b/src/kit/code_searcher.py @@ -36,6 +36,29 @@ def __init__(self, repo_path: str) -> None: self.repo_path: Path = Path(repo_path) self._gitignore_spec = self._load_gitignore() # Load gitignore spec + def _adjust_gitignore_pattern(self, pattern: str, rel_base: Path) -> str: + """Adjust a gitignore pattern to be relative to the repository root. + + Args: + pattern: The pattern from a .gitignore file (already stripped, negation removed) + rel_base: Relative path from repo root to the .gitignore directory + + Returns: + The adjusted pattern prefixed with the correct path + """ + if str(rel_base) == ".": + # Pattern is in root .gitignore - use as-is + return pattern + + # Pattern is in subdirectory + if pattern.startswith("/"): + # Absolute pattern (relative to gitignore dir) - make relative to repo root + return f"{rel_base}/{pattern[1:]}" + else: + # Relative pattern - applies to directory and all subdirectories + # Use /** to match files at any depth under the directory + return f"{rel_base}/**/{pattern}" + def _load_gitignore(self): """Load all .gitignore files in repository tree and merge them. @@ -46,13 +69,10 @@ def _load_gitignore(self): # Collect all .gitignore files for dirpath, dirnames, filenames in os.walk(self.repo_path): - # Skip .git directory if ".git" in Path(dirpath).parts: continue - if ".gitignore" in filenames: - gitignore_path = Path(dirpath) / ".gitignore" - gitignore_files.append(gitignore_path) + gitignore_files.append(Path(dirpath) / ".gitignore") if not gitignore_files: return None @@ -64,45 +84,29 @@ def _load_gitignore(self): # Collect all patterns with proper path prefixes all_patterns = [] for gitignore_path in gitignore_files: - gitignore_dir = gitignore_path.parent - try: with open(gitignore_path, "r", encoding="utf-8") as f: patterns = f.readlines() # Calculate relative base path from repo root try: - rel_base = gitignore_dir.relative_to(self.repo_path) + rel_base = gitignore_path.parent.relative_to(self.repo_path) except ValueError: - # gitignore outside repo (shouldn't happen, but be safe) - continue + continue # gitignore outside repo (shouldn't happen) # Process each pattern for pattern in patterns: pattern = pattern.strip() - - # Skip empty lines and comments if not pattern or pattern.startswith("#"): continue - # Handle negation patterns (must preserve ! at the beginning) + # Handle negation patterns is_negation = pattern.startswith("!") if is_negation: - pattern = pattern[1:] # Remove the ! temporarily + pattern = pattern[1:] # Adjust pattern to be relative to repo root - if str(rel_base) != ".": - # Pattern is in subdirectory - prefix with path - if pattern.startswith("/"): - # Absolute pattern (from gitignore dir) - make relative to repo - adjusted = f"{rel_base}/{pattern[1:]}" - else: - # Relative pattern - applies to directory and all subdirectories - # Use /** to match files at any depth under the directory - adjusted = f"{rel_base}/**/{pattern}" if "*" in pattern else f"{rel_base}/{pattern}" - else: - # Pattern is in root .gitignore - use as-is - adjusted = pattern + adjusted = self._adjust_gitignore_pattern(pattern, rel_base) # Re-add negation prefix if needed if is_negation: @@ -111,7 +115,6 @@ def _load_gitignore(self): all_patterns.append(adjusted) except Exception as e: - # Log warning but continue processing other .gitignore files logging.warning(f"Could not load {gitignore_path}: {e}") continue diff --git a/src/kit/repo_mapper.py b/src/kit/repo_mapper.py index b2217f6d..62ae31d8 100644 --- a/src/kit/repo_mapper.py +++ b/src/kit/repo_mapper.py @@ -23,6 +23,29 @@ def __init__(self, repo_path: str) -> None: self._file_tree: Optional[List[Dict[str, Any]]] = None self._gitignore_spec = self._load_gitignore() + def _adjust_gitignore_pattern(self, pattern: str, rel_base: Path) -> str: + """Adjust a gitignore pattern to be relative to the repository root. + + Args: + pattern: The pattern from a .gitignore file (already stripped, negation removed) + rel_base: Relative path from repo root to the .gitignore directory + + Returns: + The adjusted pattern prefixed with the correct path + """ + if str(rel_base) == ".": + # Pattern is in root .gitignore - use as-is + return pattern + + # Pattern is in subdirectory + if pattern.startswith("/"): + # Absolute pattern (relative to gitignore dir) - make relative to repo root + return f"{rel_base}/{pattern[1:]}" + else: + # Relative pattern - applies to directory and all subdirectories + # Use /** to match files at any depth under the directory + return f"{rel_base}/**/{pattern}" + def _load_gitignore(self): """Load all .gitignore files in repository tree and merge them. @@ -33,13 +56,10 @@ def _load_gitignore(self): # Collect all .gitignore files for dirpath, dirnames, filenames in os.walk(self.repo_path): - # Skip .git directory if ".git" in Path(dirpath).parts: continue - if ".gitignore" in filenames: - gitignore_path = Path(dirpath) / ".gitignore" - gitignore_files.append(gitignore_path) + gitignore_files.append(Path(dirpath) / ".gitignore") if not gitignore_files: return None @@ -51,45 +71,29 @@ def _load_gitignore(self): # Collect all patterns with proper path prefixes all_patterns = [] for gitignore_path in gitignore_files: - gitignore_dir = gitignore_path.parent - try: with open(gitignore_path, "r", encoding="utf-8") as f: patterns = f.readlines() # Calculate relative base path from repo root try: - rel_base = gitignore_dir.relative_to(self.repo_path) + rel_base = gitignore_path.parent.relative_to(self.repo_path) except ValueError: - # gitignore outside repo (shouldn't happen, but be safe) - continue + continue # gitignore outside repo (shouldn't happen) # Process each pattern for pattern in patterns: pattern = pattern.strip() - - # Skip empty lines and comments if not pattern or pattern.startswith("#"): continue - # Handle negation patterns (must preserve ! at the beginning) + # Handle negation patterns is_negation = pattern.startswith("!") if is_negation: - pattern = pattern[1:] # Remove the ! temporarily + pattern = pattern[1:] # Adjust pattern to be relative to repo root - if str(rel_base) != ".": - # Pattern is in subdirectory - prefix with path - if pattern.startswith("/"): - # Absolute pattern (from gitignore dir) - make relative to repo - adjusted = f"{rel_base}/{pattern[1:]}" - else: - # Relative pattern - applies to directory and all subdirectories - # Use /** to match files at any depth under the directory - adjusted = f"{rel_base}/**/{pattern}" if "*" in pattern else f"{rel_base}/{pattern}" - else: - # Pattern is in root .gitignore - use as-is - adjusted = pattern + adjusted = self._adjust_gitignore_pattern(pattern, rel_base) # Re-add negation prefix if needed if is_negation: @@ -98,7 +102,6 @@ def _load_gitignore(self): all_patterns.append(adjusted) except Exception as e: - # Log warning but continue processing other .gitignore files logging.warning(f"Could not load {gitignore_path}: {e}") continue diff --git a/tests/test_gitignore.py b/tests/test_gitignore.py index a8798e05..8825c9f9 100644 --- a/tests/test_gitignore.py +++ b/tests/test_gitignore.py @@ -221,6 +221,52 @@ def test_complex_negation_patterns(): assert "config/template.env.example" in paths # Negation allows it +def test_patterns_without_wildcards_match_at_any_depth(): + """Test that patterns without wildcards (like 'node_modules') match at any depth.""" + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) + + # Frontend with .gitignore containing plain 'node_modules' (no wildcards) + frontend = repo / "frontend" + frontend.mkdir() + (frontend / ".gitignore").write_text("node_modules\n") + + # Create node_modules at multiple depths + (frontend / "package.json").touch() + + # Direct child + nm1 = frontend / "node_modules" + nm1.mkdir() + (nm1 / "pkg1.json").touch() + + # Nested in src + src = frontend / "src" + src.mkdir() + nm2 = src / "node_modules" + nm2.mkdir() + (nm2 / "pkg2.json").touch() + + # Deeply nested + deep = src / "components" / "ui" + deep.mkdir(parents=True) + nm3 = deep / "node_modules" + nm3.mkdir() + (nm3 / "pkg3.json").touch() + + mapper = RepoMapper(str(repo)) + tree = mapper.get_file_tree() + + paths = [item["path"] for item in tree] + + # package.json should be included + assert "frontend/package.json" in paths + + # All node_modules at any depth should be ignored + assert "frontend/node_modules/pkg1.json" not in paths + assert "frontend/src/node_modules/pkg2.json" not in paths + assert "frontend/src/components/ui/node_modules/pkg3.json" not in paths + + def test_deeply_nested_gitignores(): """Test .gitignore files at multiple depth levels.""" with tempfile.TemporaryDirectory() as tmpdir: