Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion review_roadmap/agent/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,53 @@ def _build_fetched_content_str(fetched_content: Dict[str, str]) -> str:
return "".join(parts)


# Maximum characters per diff before truncation
MAX_DIFF_CHARS = 1500
# Maximum total characters for all diffs combined
MAX_TOTAL_DIFF_CHARS = 80000


def _build_diffs_context(state: ReviewState) -> str:
"""Build formatted diff content for all changed files.

Includes the actual unified diff for each file so the LLM can see
the code changes, not just file names. Large diffs are truncated
to manage token limits.

Args:
state: Current workflow state containing PR context.

Returns:
Formatted string with all diffs, suitable for LLM prompt.
"""
if not state.pr_context.files:
return "No files changed."

parts = []
total_chars = 0

for f in state.pr_context.files:
if not f.diff_content:
# Binary files or very large files may not have diff content
file_section = f"### {f.path} ({f.status}, +{f.additions}/-{f.deletions})\n[No diff available - binary or large file]\n"
else:
diff = f.diff_content
if len(diff) > MAX_DIFF_CHARS:
diff = diff[:MAX_DIFF_CHARS] + f"\n... (truncated, {len(f.diff_content)} chars total)"
file_section = f"### {f.path} ({f.status}, +{f.additions}/-{f.deletions})\n```diff\n{diff}\n```\n"

# Check if adding this would exceed our total budget
if total_chars + len(file_section) > MAX_TOTAL_DIFF_CHARS:
remaining = len(state.pr_context.files) - len(parts)
parts.append(f"\n... ({remaining} more files not shown due to size limits)\n")
break

parts.append(file_section)
total_chars += len(file_section)

return "\n".join(parts)


def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
"""Generate the final Markdown review roadmap.

Expand All @@ -332,6 +379,7 @@ def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
iteration=state.reflection_iterations)

files_context = _build_files_context(state)
diffs_context = _build_diffs_context(state)
comments_context = _build_comments_context(state)
fetched_context_str = _build_fetched_content_str(state.fetched_content)

Expand All @@ -357,9 +405,12 @@ def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
Topology Analysis:
{state.topology.get('analysis', 'No analysis')}

Files (with base links):
Files (with deep links for review):
{chr(10).join(files_context)}

## File Diffs (actual code changes)
{diffs_context}

Existing Comments:
{chr(10).join(comments_context) if comments_context else "No comments found."}
{fetched_context_str}
Expand Down
25 changes: 20 additions & 5 deletions review_roadmap/agent/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,27 @@
You have the PR metadata, file list, and topology.
Review the "High Risk" files or ambiguous changes.

If you need to see the FULL content of a file (not just the diff) to understand it, use the `read_file` tool.
For example:
- If a class hierarchy changed, read the parent class file.
- If a complex logic change involves helper functions, read the file definition.
## The `read_file` Tool

Do not fetch files unless necessary. If the diff is sufficient, just return "DONE".
You can use `read_file` to fetch the FULL content of ANY file in the repository - not just files in the PR diff. This is useful for:

1. **Verifying imports exist**: If the PR imports from modules not in the diff (e.g., `from myproject.utils import helper`), fetch the file to confirm it exists and understand its interface.

2. **Understanding parent classes**: If a class inherits from a base class not in the diff, fetch it to understand the contract.

3. **Checking helper functions**: If the PR calls functions defined elsewhere, fetch them to understand the context.

4. **Validating configuration references**: If code references config files or constants, fetch them.

## Examples
- PR imports `from temperature_agent.tools.memory import store_house_knowledge` → fetch `temperature_agent/tools/memory.py`
- PR extends `BaseProcessor` from `core/processors.py` → fetch `core/processors.py`
- PR uses `settings.API_KEY` → fetch `config/settings.py`

## Guidelines
- Do not fetch files unless necessary. If the diff is self-explanatory, just return "DONE".
- Prioritize fetching files that help verify the PR is complete (e.g., are all required interfaces implemented?).
- If a file doesn't exist, that's valuable information - it suggests missing files in the PR.
"""

DRAFT_ROADMAP_SYSTEM_PROMPT = """You are a benevolent Senior Staff Engineer guiding a junior reviewer.
Expand Down
17 changes: 14 additions & 3 deletions review_roadmap/agent/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,20 @@
@tool
def read_file(path: str) -> Optional[str]:
"""
Reads the full content of a specific file from the PR.
Use this when you need to see the code context surrounding a change,
or to understand a whole class/module definition.
Reads the full content of ANY file from the repository at the PR's head commit.

This fetches files from the entire codebase, not just files in the PR diff.
Use this to:
- Verify that imported modules exist (e.g., fetch 'myproject/utils/helpers.py')
- Understand parent classes or interfaces referenced by the PR
- Check helper functions or utilities called by changed code
- Confirm configuration or constant values referenced in the PR

Args:
path: Full path to the file from the repository root (e.g., 'src/utils/auth.py')

Returns:
The file content, or an error message if the file doesn't exist.
"""
# The actual implementation happens in the node, this is just for schema binding
return None
45 changes: 34 additions & 11 deletions review_roadmap/github/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def _fetch_pr_metadata(self, owner: str, repo: str, pr_number: int) -> PRMetadat
def _fetch_file_diffs(self, owner: str, repo: str, pr_number: int) -> List[FileDiff]:
"""Fetch the list of changed files with their diffs.

Handles pagination to fetch all files, not just the first page.
GitHub defaults to 30 files per page; we request 100 (max allowed).

Args:
owner: Repository owner.
repo: Repository name.
Expand All @@ -95,19 +98,39 @@ def _fetch_file_diffs(self, owner: str, repo: str, pr_number: int) -> List[FileD
Raises:
httpx.HTTPStatusError: If the API request fails.
"""
files_resp = self.client.get(f"/repos/{owner}/{repo}/pulls/{pr_number}/files")
files_resp.raise_for_status()
all_files: List[FileDiff] = []
page = 1
per_page = 100 # Max allowed by GitHub API

return [
FileDiff(
path=f["filename"],
status=f["status"],
additions=f["additions"],
deletions=f["deletions"],
diff_content=f.get("patch", "") # Patch might be missing for binary/large files
while True:
files_resp = self.client.get(
f"/repos/{owner}/{repo}/pulls/{pr_number}/files",
params={"page": page, "per_page": per_page}
)
for f in files_resp.json()
]
files_resp.raise_for_status()
files_data = files_resp.json()

if not files_data:
break # No more files

all_files.extend([
FileDiff(
path=f["filename"],
status=f["status"],
additions=f["additions"],
deletions=f["deletions"],
diff_content=f.get("patch", "") # Patch might be missing for binary/large files
)
for f in files_data
])

# If we got fewer than per_page, we've reached the last page
if len(files_data) < per_page:
break

page += 1

return all_files

def _fetch_issue_comments(self, owner: str, repo: str, pr_number: int) -> List[PRComment]:
"""Fetch general conversation comments from the issues endpoint.
Expand Down