ambient-code · jwm4 · Jan 11, 2026 · Jan 11, 2026
diff --git a/review_roadmap/agent/nodes.py b/review_roadmap/agent/nodes.py
@@ -312,6 +312,53 @@ def _build_fetched_content_str(fetched_content: Dict[str, str]) -> str:
     return "".join(parts)
 
 
+# Maximum characters per diff before truncation
+MAX_DIFF_CHARS = 1500
+# Maximum total characters for all diffs combined
+MAX_TOTAL_DIFF_CHARS = 80000
+
+
+def _build_diffs_context(state: ReviewState) -> str:
+    """Build formatted diff content for all changed files.
+
+    Includes the actual unified diff for each file so the LLM can see
+    the code changes, not just file names. Large diffs are truncated
+    to manage token limits.
+
+    Args:
+        state: Current workflow state containing PR context.
+
+    Returns:
+        Formatted string with all diffs, suitable for LLM prompt.
+    """
+    if not state.pr_context.files:
+        return "No files changed."
+
+    parts = []
+    total_chars = 0
+
+    for f in state.pr_context.files:
+        if not f.diff_content:
+            # Binary files or very large files may not have diff content
+            file_section = f"### {f.path} ({f.status}, +{f.additions}/-{f.deletions})\n[No diff available - binary or large file]\n"
+        else:
+            diff = f.diff_content
+            if len(diff) > MAX_DIFF_CHARS:
+                diff = diff[:MAX_DIFF_CHARS] + f"\n... (truncated, {len(f.diff_content)} chars total)"
+            file_section = f"### {f.path} ({f.status}, +{f.additions}/-{f.deletions})\n```diff\n{diff}\n```\n"
+
+        # Check if adding this would exceed our total budget
+        if total_chars + len(file_section) > MAX_TOTAL_DIFF_CHARS:
+            remaining = len(state.pr_context.files) - len(parts)
+            parts.append(f"\n... ({remaining} more files not shown due to size limits)\n")
+            break
+
+        parts.append(file_section)
+        total_chars += len(file_section)
+
+    return "\n".join(parts)
+
+
 def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
     """Generate the final Markdown review roadmap.
 
@@ -332,6 +379,7 @@ def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
                 iteration=state.reflection_iterations)
 
     files_context = _build_files_context(state)
+    diffs_context = _build_diffs_context(state)
     comments_context = _build_comments_context(state)
     fetched_context_str = _build_fetched_content_str(state.fetched_content)
 
@@ -357,9 +405,12 @@ def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
     Topology Analysis:
     {state.topology.get('analysis', 'No analysis')}
 
-    Files (with base links):
+    Files (with deep links for review):
     {chr(10).join(files_context)}
 
+    ## File Diffs (actual code changes)
+    {diffs_context}
+
     Existing Comments:
     {chr(10).join(comments_context) if comments_context else "No comments found."}
     {fetched_context_str}

diff --git a/review_roadmap/agent/prompts.py b/review_roadmap/agent/prompts.py
@@ -15,12 +15,27 @@
 You have the PR metadata, file list, and topology.
 Review the "High Risk" files or ambiguous changes.
 
-If you need to see the FULL content of a file (not just the diff) to understand it, use the `read_file` tool.
-For example:
-- If a class hierarchy changed, read the parent class file.
-- If a complex logic change involves helper functions, read the file definition.
+## The `read_file` Tool
 
-Do not fetch files unless necessary. If the diff is sufficient, just return "DONE".
+You can use `read_file` to fetch the FULL content of ANY file in the repository - not just files in the PR diff. This is useful for:
+
+1. **Verifying imports exist**: If the PR imports from modules not in the diff (e.g., `from myproject.utils import helper`), fetch the file to confirm it exists and understand its interface.
+
+2. **Understanding parent classes**: If a class inherits from a base class not in the diff, fetch it to understand the contract.
+
+3. **Checking helper functions**: If the PR calls functions defined elsewhere, fetch them to understand the context.
+
+4. **Validating configuration references**: If code references config files or constants, fetch them.
+
+## Examples
+- PR imports `from temperature_agent.tools.memory import store_house_knowledge` → fetch `temperature_agent/tools/memory.py`
+- PR extends `BaseProcessor` from `core/processors.py` → fetch `core/processors.py`
+- PR uses `settings.API_KEY` → fetch `config/settings.py`
+
+## Guidelines
+- Do not fetch files unless necessary. If the diff is self-explanatory, just return "DONE".
+- Prioritize fetching files that help verify the PR is complete (e.g., are all required interfaces implemented?).
+- If a file doesn't exist, that's valuable information - it suggests missing files in the PR.
 """
 
 DRAFT_ROADMAP_SYSTEM_PROMPT = """You are a benevolent Senior Staff Engineer guiding a junior reviewer.

diff --git a/review_roadmap/agent/tools.py b/review_roadmap/agent/tools.py
@@ -6,9 +6,20 @@
 @tool
 def read_file(path: str) -> Optional[str]:
     """
-    Reads the full content of a specific file from the PR.
-    Use this when you need to see the code context surrounding a change,
-    or to understand a whole class/module definition.
+    Reads the full content of ANY file from the repository at the PR's head commit.
+
+    This fetches files from the entire codebase, not just files in the PR diff.
+    Use this to:
+    - Verify that imported modules exist (e.g., fetch 'myproject/utils/helpers.py')
+    - Understand parent classes or interfaces referenced by the PR
+    - Check helper functions or utilities called by changed code
+    - Confirm configuration or constant values referenced in the PR
+
+    Args:
+        path: Full path to the file from the repository root (e.g., 'src/utils/auth.py')
+
+    Returns:
+        The file content, or an error message if the file doesn't exist.
     """
     # The actual implementation happens in the node, this is just for schema binding
     return None
diff --git a/review_roadmap/github/client.py b/review_roadmap/github/client.py
@@ -84,6 +84,9 @@ def _fetch_pr_metadata(self, owner: str, repo: str, pr_number: int) -> PRMetadat
     def _fetch_file_diffs(self, owner: str, repo: str, pr_number: int) -> List[FileDiff]:
         """Fetch the list of changed files with their diffs.
 
+        Handles pagination to fetch all files, not just the first page.
+        GitHub defaults to 30 files per page; we request 100 (max allowed).
+
         Args:
             owner: Repository owner.
             repo: Repository name.
@@ -95,19 +98,39 @@ def _fetch_file_diffs(self, owner: str, repo: str, pr_number: int) -> List[FileD
         Raises:
             httpx.HTTPStatusError: If the API request fails.
         """
-        files_resp = self.client.get(f"/repos/{owner}/{repo}/pulls/{pr_number}/files")
-        files_resp.raise_for_status()
+        all_files: List[FileDiff] = []
+        page = 1
+        per_page = 100  # Max allowed by GitHub API
 
-        return [
-            FileDiff(
-                path=f["filename"],
-                status=f["status"],
-                additions=f["additions"],
-                deletions=f["deletions"],
-                diff_content=f.get("patch", "")  # Patch might be missing for binary/large files
+        while True:
+            files_resp = self.client.get(
+                f"/repos/{owner}/{repo}/pulls/{pr_number}/files",
+                params={"page": page, "per_page": per_page}
             )
-            for f in files_resp.json()
-        ]
+            files_resp.raise_for_status()
+            files_data = files_resp.json()
+
+            if not files_data:
+                break  # No more files
+
+            all_files.extend([
+                FileDiff(
+                    path=f["filename"],
+                    status=f["status"],
+                    additions=f["additions"],
+                    deletions=f["deletions"],
+                    diff_content=f.get("patch", "")  # Patch might be missing for binary/large files
+                )
+                for f in files_data
+            ])
+
+            # If we got fewer than per_page, we've reached the last page
+            if len(files_data) < per_page:
+                break
+
+            page += 1
+
+        return all_files
 
     def _fetch_issue_comments(self, owner: str, repo: str, pr_number: int) -> List[PRComment]:
         """Fetch general conversation comments from the issues endpoint.