-
Notifications
You must be signed in to change notification settings - Fork 72
Fix .gitignore precedence and pattern handling (fixes #144) #149
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,12 +23,93 @@ def __init__(self, repo_path: str) -> None: | |
| self._file_tree: Optional[List[Dict[str, Any]]] = None | ||
| self._gitignore_spec = self._load_gitignore() | ||
|
|
||
| def _adjust_gitignore_pattern(self, pattern: str, rel_base: Path) -> str: | ||
| """Adjust a gitignore pattern to be relative to the repository root. | ||
|
|
||
| Args: | ||
| pattern: The pattern from a .gitignore file (already stripped, negation removed) | ||
| rel_base: Relative path from repo root to the .gitignore directory | ||
|
|
||
| Returns: | ||
| The adjusted pattern prefixed with the correct path | ||
| """ | ||
| if str(rel_base) == ".": | ||
| # Pattern is in root .gitignore - use as-is | ||
| return pattern | ||
|
|
||
| # Pattern is in subdirectory | ||
| if pattern.startswith("/"): | ||
| # Absolute pattern (relative to gitignore dir) - make relative to repo root | ||
| return f"{rel_base}/{pattern[1:]}" | ||
| else: | ||
| # Relative pattern - applies to directory and all subdirectories | ||
| # Use /** to match files at any depth under the directory | ||
| return f"{rel_base}/**/{pattern}" | ||
|
Comment on lines
+44
to
+47
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pattern adjustment logic incorrectly applies recursive matching ( View Details📝 Patch Detailsdiff --git a/src/kit/code_searcher.py b/src/kit/code_searcher.py
index 0c7e5f2..806c62d 100644
--- a/src/kit/code_searcher.py
+++ b/src/kit/code_searcher.py
@@ -54,8 +54,14 @@ class CodeSearcher:
if pattern.startswith("/"):
# Absolute pattern (relative to gitignore dir) - make relative to repo root
return f"{rel_base}/{pattern[1:]}"
+ elif "/" in pattern:
+ # Pattern with internal slash - keep relative to gitignore dir
+ # According to gitignore spec: "If there is a separator at the beginning
+ # or middle of the pattern, then the pattern is relative to the directory
+ # level of the .gitignore file itself"
+ return f"{rel_base}/{pattern}"
else:
- # Relative pattern - applies to directory and all subdirectories
+ # Pattern without slash - applies to directory and all subdirectories
# Use /** to match files at any depth under the directory
return f"{rel_base}/**/{pattern}"
diff --git a/src/kit/repo_mapper.py b/src/kit/repo_mapper.py
index 62ae31d..a011eb3 100644
--- a/src/kit/repo_mapper.py
+++ b/src/kit/repo_mapper.py
@@ -41,8 +41,14 @@ class RepoMapper:
if pattern.startswith("/"):
# Absolute pattern (relative to gitignore dir) - make relative to repo root
return f"{rel_base}/{pattern[1:]}"
+ elif "/" in pattern:
+ # Pattern with internal slash - keep relative to gitignore dir
+ # According to gitignore spec: "If there is a separator at the beginning
+ # or middle of the pattern, then the pattern is relative to the directory
+ # level of the .gitignore file itself"
+ return f"{rel_base}/{pattern}"
else:
- # Relative pattern - applies to directory and all subdirectories
+ # Pattern without slash - applies to directory and all subdirectories
# Use /** to match files at any depth under the directory
return f"{rel_base}/**/{pattern}"
AnalysisIncorrect recursive matching for patterns with internal slashes in subdirectory .gitignore filesWhat fails: The How to reproduce:
Expected behavior: According to git documentation on gitignore patterns: "If there is a separator at the beginning or middle of the pattern, then the pattern is relative to the directory level of the .gitignore file itself." Patterns with internal slashes should NOT be treated as wildcard patterns that match at any depth. Root cause: The code had three cases but was missing the distinction for patterns with internal slashes:
The fix adds explicit handling for patterns containing internal slashes by prepending the relative base path without |
||
|
|
||
| def _load_gitignore(self): | ||
| gitignore_path = self.repo_path / ".gitignore" | ||
| if gitignore_path.exists(): | ||
| with open(gitignore_path) as f: | ||
| return pathspec.PathSpec.from_lines("gitwildmatch", f) | ||
| return None | ||
| """Load all .gitignore files in repository tree and merge them. | ||
|
|
||
| Returns a PathSpec that respects all .gitignore files, with proper | ||
| precedence (patterns from deeper directories can override root patterns). | ||
| """ | ||
| gitignore_files = [] | ||
|
|
||
| # Collect all .gitignore files | ||
| for dirpath, dirnames, filenames in os.walk(self.repo_path): | ||
| if ".git" in Path(dirpath).parts: | ||
| continue | ||
| if ".gitignore" in filenames: | ||
| gitignore_files.append(Path(dirpath) / ".gitignore") | ||
|
|
||
| if not gitignore_files: | ||
| return None | ||
|
|
||
| # Sort by depth (shallowest first) for correct precedence | ||
| # Git processes .gitignore files from root to leaf, so later patterns can override earlier ones | ||
| gitignore_files.sort(key=lambda p: len(p.parts)) | ||
|
|
||
| # Collect all patterns with proper path prefixes | ||
| all_patterns = [] | ||
| for gitignore_path in gitignore_files: | ||
| try: | ||
| with open(gitignore_path, "r", encoding="utf-8") as f: | ||
| patterns = f.readlines() | ||
|
|
||
| # Calculate relative base path from repo root | ||
| try: | ||
| rel_base = gitignore_path.parent.relative_to(self.repo_path) | ||
| except ValueError: | ||
| continue # gitignore outside repo (shouldn't happen) | ||
|
|
||
| # Process each pattern | ||
| for pattern in patterns: | ||
| pattern = pattern.strip() | ||
| if not pattern or pattern.startswith("#"): | ||
| continue | ||
|
|
||
| # Handle negation patterns | ||
| is_negation = pattern.startswith("!") | ||
| if is_negation: | ||
| pattern = pattern[1:] | ||
|
|
||
| # Adjust pattern to be relative to repo root | ||
| adjusted = self._adjust_gitignore_pattern(pattern, rel_base) | ||
|
|
||
| # Re-add negation prefix if needed | ||
| if is_negation: | ||
| adjusted = f"!{adjusted}" | ||
|
|
||
| all_patterns.append(adjusted) | ||
|
|
||
| except Exception as e: | ||
| logging.warning(f"Could not load {gitignore_path}: {e}") | ||
| continue | ||
|
|
||
| if not all_patterns: | ||
| return None | ||
|
|
||
| # Create single merged pathspec | ||
| return pathspec.PathSpec.from_lines("gitwildmatch", all_patterns) | ||
|
|
||
| def _should_ignore(self, file: Path) -> bool: | ||
| # Handle potential symlink resolution mismatches | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| import pytest | ||
| from pathlib import Path | ||
| from kit.repo_mapper import RepoMapper | ||
| import subprocess | ||
|
|
||
|
|
||
| @pytest.mark.integration | ||
| @pytest.mark.skipif( | ||
| not Path("/home/selman/dev/humanlayer").exists(), | ||
| reason="Requires humanlayer repository" | ||
| ) | ||
| def test_humanlayer_repo_gitignore(): | ||
| """Integration test: Verify fix works on actual humanlayer repo.""" | ||
|
|
||
| # Get git's file count | ||
| result = subprocess.run( | ||
| ["git", "ls-files"], | ||
| cwd="/home/selman/dev/humanlayer", | ||
| capture_output=True, | ||
| text=True | ||
| ) | ||
| git_files = set(result.stdout.strip().split("\n")) | ||
| git_count = len(git_files) | ||
|
|
||
| # Get kit's file count | ||
| mapper = RepoMapper("/home/selman/dev/humanlayer") | ||
| tree = mapper.get_file_tree() | ||
| kit_count = len(tree) | ||
| kit_paths = {item["path"] for item in tree} | ||
|
|
||
| # Should be approximately equal (within 10% tolerance for build artifacts) | ||
| tolerance = 0.1 | ||
| assert abs(kit_count - git_count) / git_count < tolerance, \ | ||
| f"Kit returned {kit_count} files, Git tracks {git_count} files" | ||
|
|
||
| # Should be well under token limit (assuming ~100 chars per file path) | ||
| estimated_tokens = kit_count * 100 | ||
| assert estimated_tokens < 25000, \ | ||
| f"Estimated {estimated_tokens} tokens (exceeds 25k limit)" | ||
|
|
||
| # Verify no node_modules files included | ||
| node_modules_files = [p for p in kit_paths if "node_modules" in p] | ||
| assert len(node_modules_files) == 0, \ | ||
| f"Found {len(node_modules_files)} node_modules files (should be 0)" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same pattern adjustment logic error as in repo_mapper.py - patterns with internal forward slashes are incorrectly treated as matching at any depth.
View Details
📝 Patch Details
Analysis
Gitignore pattern adjustment logic error - patterns with internal slashes incorrectly treated as recursive
What fails: The
_adjust_gitignore_pattern()method in bothCodeSearcher(line 54-60) andRepoMapper(line 41-48) incorrectly adds recursive**/matching to all patterns without a slash, including patterns that contain internal slashes. This causes patterns likesrc/config.jsfrom a subdirectory.gitignoreto match files at ANY depth under that directory, rather than only at the exact relative path.How to reproduce:
Create a repository structure:
Run CodeSearcher or RepoMapper on this repository
Check which files are ignored via
_should_ignore()methodResult: Both
subdir/src/config.jsandsubdir/anything/src/config.jsare marked as ignoredExpected: Only
subdir/src/config.jsshould be ignored. The patternsrc/config.jsinsubdir/.gitignoremeans "match the file at the pathsrc/config.jsrelative to the subdirectory", not "matchsrc/config.jsat any depth". This matches Git's actual behavior per gitignore documentation - a pattern containing a slash is treated as a glob pattern relative to the directory containing the .gitignore file.Fix: The
_adjust_gitignore_pattern()method now checks for internal slashes before adding**/:/are anchored to the gitignore directory/are matched exactly relative to the gitignore directory**/to match at any depth