Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ A CLI tool that uses LLMs (Claude) to generate a structured, human-friendly road
- **Topology Analysis**: Groups changed files into logical components (e.g., API, DB, Frontend).
- **Deep Linking**: Generates links to specific lines of code in the PR.
- **Review Guidance**: Suggests a logical order for reviewing files.
- **Self-Reflection**: Reviews its own output before presenting, catching issues and improving quality.
- **Integration**: Fetches PR metadata, diffs, and existing comments from GitHub.

## Installation
Expand Down Expand Up @@ -116,6 +117,7 @@ review_roadmap {PR link in the form owner/repo/pr_number or just a URL to the PR
|--------|-------------|
| `--output`, `-o` | Save the roadmap to a file instead of printing to stdout |
| `--post`, `-p` | Post the roadmap as a comment directly on the PR |
| `--no-reflection` | Skip the self-reflection step for faster results |

You can use both `-o` and `-p` together—the roadmap will be generated once and saved to both the file and the PR comment.

Expand All @@ -141,6 +143,12 @@ Generate and both save to file and post to PR:
review_roadmap https://github.com/llamastack/llama-stack/pull/3674 -o roadmap.md -p
```

Generate quickly without self-reflection (faster but may have lower quality):

```bash
review_roadmap https://github.com/llamastack/llama-stack/pull/3674 --no-reflection
```

## Development

```bash
Expand All @@ -159,5 +167,8 @@ pytest -v
The tool uses **LangGraph** to orchestrate the workflow:

1. **Analyze Structure**: LLM analyzes file paths to understand component groups.
2. **Context Expansion**: (Planned) Fetches additional file content if diffs are ambiguous.
2. **Context Expansion**: Fetches additional file content if diffs are ambiguous.
3. **Draft Roadmap**: Synthesizes metadata, diffs, and comments into a coherent guide.
4. **Self-Reflection**: Reviews the generated roadmap for completeness and accuracy, retrying if needed.

The self-reflection step implements the [self-review pattern](https://github.com/jeremyeder/reference/blob/main/docs/patterns/self-review-reflection.md), where the agent evaluates its own output before presenting it to users. This catches issues like missing files, generic advice, or unclear reasoning—improving quality without manual review.
76 changes: 73 additions & 3 deletions review_roadmap/agent/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,63 @@
from langgraph.graph.state import CompiledStateGraph

from review_roadmap.agent.state import ReviewState
from review_roadmap.agent.nodes import analyze_structure, context_expansion, draft_roadmap
from review_roadmap.agent.nodes import (
analyze_structure,
context_expansion,
draft_roadmap,
reflect_on_roadmap,
)
from review_roadmap.agent.prompts import MAX_REFLECTION_ITERATIONS
from review_roadmap.logging import get_logger

logger = get_logger(__name__)


def _should_reflect(state: ReviewState) -> str:
"""Determine whether to run reflection or skip to end.

Args:
state: Current workflow state.

Returns:
'reflect' to run reflection, 'end' to skip.
"""
if state.skip_reflection:
logger.info("reflection_skipped", reason="disabled by user")
return "end"
return "reflect"


def _after_reflection(state: ReviewState) -> str:
"""Determine whether to retry roadmap generation or finish.

Args:
state: Current workflow state with reflection results.

Returns:
'retry' to regenerate roadmap, 'end' to finish.
"""
if state.reflection_passed:
return "end"
if state.reflection_iterations >= MAX_REFLECTION_ITERATIONS:
logger.warning("max_reflection_iterations_reached",
iterations=state.reflection_iterations)
return "end"
return "retry"


def build_graph() -> CompiledStateGraph:
"""Build and compile the LangGraph workflow for review roadmap generation.

The workflow consists of three sequential nodes:
The workflow consists of four nodes with conditional routing:
1. analyze_structure: Groups changed files into logical components
2. context_expansion: Optionally fetches additional file content for context
3. draft_roadmap: Generates the final Markdown roadmap
4. reflect_on_roadmap: Self-reviews the roadmap and may trigger a retry

The reflection step can be skipped by setting skip_reflection=True in the
initial state. If reflection fails, the workflow loops back to draft_roadmap
with feedback, up to MAX_REFLECTION_ITERATIONS times.

Returns:
A compiled LangGraph that can be invoked with a ReviewState containing
Expand All @@ -27,18 +74,41 @@ def build_graph() -> CompiledStateGraph:
>>> graph = build_graph()
>>> result = graph.invoke({"pr_context": pr_context})
>>> roadmap = result["roadmap"]

# Skip reflection:
>>> result = graph.invoke({"pr_context": pr_context, "skip_reflection": True})
"""
workflow = StateGraph(ReviewState)

# Add Nodes
workflow.add_node("analyze_structure", analyze_structure)
workflow.add_node("context_expansion", context_expansion)
workflow.add_node("draft_roadmap", draft_roadmap)
workflow.add_node("reflect_on_roadmap", reflect_on_roadmap)

# Define Edges
workflow.set_entry_point("analyze_structure")
workflow.add_edge("analyze_structure", "context_expansion")
workflow.add_edge("context_expansion", "draft_roadmap")
workflow.add_edge("draft_roadmap", END)

# After draft_roadmap, decide whether to reflect or skip
workflow.add_conditional_edges(
"draft_roadmap",
_should_reflect,
{
"reflect": "reflect_on_roadmap",
"end": END,
}
)

# After reflection, decide whether to retry or finish
workflow.add_conditional_edges(
"reflect_on_roadmap",
_after_reflection,
{
"retry": "draft_roadmap",
"end": END,
}
)

return workflow.compile()
85 changes: 84 additions & 1 deletion review_roadmap/agent/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def _get_llm_instance() -> BaseChatModel:
ANALYZE_STRUCTURE_SYSTEM_PROMPT,
CONTEXT_EXPANSION_SYSTEM_PROMPT,
DRAFT_ROADMAP_SYSTEM_PROMPT,
REFLECT_ON_ROADMAP_SYSTEM_PROMPT,
)
from review_roadmap.agent.tools import read_file

Expand Down Expand Up @@ -318,13 +319,17 @@ def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
file analysis, topology, comments, fetched content) into a structured
roadmap with deep links to guide the reviewer.

If reflection feedback is present (from a previous iteration), it is
included in the prompt to guide improvements.

Args:
state: Current workflow state with all accumulated context.

Returns:
Dict with 'roadmap' key containing the Markdown roadmap string.
"""
logger.info("node_started", node="draft_roadmap")
logger.info("node_started", node="draft_roadmap",
iteration=state.reflection_iterations)

files_context = _build_files_context(state)
comments_context = _build_comments_context(state)
Expand All @@ -333,6 +338,15 @@ def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
repo_url = state.pr_context.metadata.repo_url
pr_number = state.pr_context.metadata.number

# Include reflection feedback if this is a retry
feedback_section = ""
if state.reflection_feedback:
feedback_section = f"""

## Self-Review Feedback (address these issues in your revision)
{state.reflection_feedback}
"""

context_str = f"""
Title: {state.pr_context.metadata.title}
Description: {state.pr_context.metadata.description}
Expand All @@ -349,6 +363,7 @@ def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
Existing Comments:
{chr(10).join(comments_context) if comments_context else "No comments found."}
{fetched_context_str}
{feedback_section}
"""

prompt = ChatPromptTemplate.from_messages([
Expand All @@ -360,3 +375,71 @@ def draft_roadmap(state: ReviewState) -> Dict[str, Any]:
response = chain.invoke({"context": context_str})

return {"roadmap": response.content}


def reflect_on_roadmap(state: ReviewState) -> Dict[str, Any]:
"""Self-review the generated roadmap before presenting to user.

Evaluates the roadmap against quality criteria and either approves it
or provides specific feedback for improvement. This implements the
self-reflection pattern to catch issues before humans see them.

Args:
state: Current workflow state with the generated roadmap.

Returns:
Dict with reflection results:
- reflection_passed: Whether the roadmap passed review
- reflection_feedback: Specific feedback if failed
- reflection_iterations: Incremented iteration count
"""
logger.info("node_started", node="reflect_on_roadmap",
iteration=state.reflection_iterations)

# Build context for reflection
files_list = "\n".join([f"- {f.path}" for f in state.pr_context.files])

context_str = f"""## PR Context
Title: {state.pr_context.metadata.title}
Changed Files:
{files_list}

## Generated Roadmap
{state.roadmap}

## Previous Feedback (if any)
{state.reflection_feedback or "None - first review"}
"""

prompt = ChatPromptTemplate.from_messages([
("system", REFLECT_ON_ROADMAP_SYSTEM_PROMPT),
("human", "{context}")
])

chain = prompt | _get_llm_instance()
response = chain.invoke({"context": context_str})

# Parse response (with fallback for non-JSON responses)
import json
try:
result = json.loads(response.content)
passed = result.get("passed", False)
feedback = result.get("feedback", "")
notes = result.get("notes", "")
except json.JSONDecodeError:
# If LLM didn't return valid JSON, assume it passed
logger.warning("reflection_response_not_json", content=response.content[:200])
passed = True
feedback = ""
notes = "Self-review: completed (non-JSON response)"

if passed:
logger.info("reflection_passed", notes=notes)
else:
logger.info("reflection_failed", feedback=feedback)

return {
"reflection_passed": passed,
"reflection_feedback": feedback,
"reflection_iterations": state.reflection_iterations + 1,
}
30 changes: 30 additions & 0 deletions review_roadmap/agent/prompts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# System Prompts for the Agent

# Maximum number of reflection iterations before accepting the roadmap
MAX_REFLECTION_ITERATIONS = 2

ANALYZE_STRUCTURE_SYSTEM_PROMPT = """You are a Senior Software Architect.

Analyze the list of changed files and group them into logical components (e.g., 'Backend API', 'Frontend Components', 'Database Schema', 'Configuration').
Expand Down Expand Up @@ -42,3 +45,30 @@

Do not be generic. Be specific to the file paths and names provided.
"""

REFLECT_ON_ROADMAP_SYSTEM_PROMPT = """You are a Senior Staff Engineer reviewing a PR review roadmap before it's shown to a human reviewer.

## Your Task
Critically evaluate this roadmap from the perspective of someone who will use it to review the PR.

## Checklist
1. **Completeness**: Are all changed files mentioned? Is anything important missing?
2. **Logical Order**: Does the suggested review order make sense? Would a reviewer get confused?
3. **Specificity**: Are the "watch outs" specific to THIS PR, or generic boilerplate?
4. **Deep Links**: Are file references actionable (include links where provided)?
5. **Accuracy**: Do the summaries match the actual file changes described?
6. **Assumptions**: Are there unstated assumptions that should be made explicit?

## Response Format
If the roadmap passes review, respond with EXACTLY this JSON:
```json
{{"passed": true, "notes": "Self-review: [brief note on quality]"}}
```

If issues need fixing, respond with EXACTLY this JSON:
```json
{{"passed": false, "feedback": "[specific issues to fix, be concise]"}}
```

Be rigorous but not pedantic. Only fail roadmaps with genuine issues that would confuse or mislead a reviewer.
"""
19 changes: 19 additions & 0 deletions review_roadmap/agent/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,18 @@ class ReviewState(BaseModel):
2. analyze_structure: populates topology
3. context_expansion: populates fetched_content (if needed)
4. draft_roadmap: populates roadmap (final output)
5. reflect_on_roadmap: self-reviews and optionally triggers retry

Attributes:
pr_context: Input PR data including metadata, files, and comments.
topology: Analysis of file groupings from the structure analysis node.
required_context: File paths identified for fetching (intermediate).
fetched_content: Additional file contents fetched for context.
roadmap: The final generated Markdown roadmap (output).
reflection_feedback: Feedback from self-reflection step for improvements.
reflection_passed: Whether the roadmap passed self-review.
reflection_iterations: Number of reflection iterations completed.
skip_reflection: Whether to skip the self-reflection step entirely.
"""

# Input
Expand All @@ -45,3 +50,17 @@ class ReviewState(BaseModel):

# Output
roadmap: str = ""

# Reflection
reflection_feedback: str = Field(
default="", description="Feedback from self-reflection step"
)
reflection_passed: bool = Field(
default=False, description="Whether the roadmap passed self-review"
)
reflection_iterations: int = Field(
default=0, description="Number of reflection iterations completed"
)
skip_reflection: bool = Field(
default=False, description="Whether to skip the self-reflection step"
)
11 changes: 8 additions & 3 deletions review_roadmap/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def format_pr_comment(roadmap_content: str) -> str:
def generate(
pr_url: str = typer.Argument(..., help="GitHub PR URL (e.g., owner/repo/123) or 'owner/repo/123' string"),
output: str = typer.Option(None, "--output", "-o", help="Output file for the roadmap"),
post: bool = typer.Option(False, "--post", "-p", help="Post the roadmap as a comment on the PR")
post: bool = typer.Option(False, "--post", "-p", help="Post the roadmap as a comment on the PR"),
no_reflection: bool = typer.Option(False, "--no-reflection", help="Skip the self-reflection step")
):
"""Generates a review roadmap for a given Pull Request."""

Expand Down Expand Up @@ -94,9 +95,13 @@ def generate(
console.print(f"[green]Found PR: {pr_context.metadata.title} (Changed files: {len(pr_context.files)})[/green]")

# 2. Run LangGraph
console.print("[bold purple]Generating Roadmap (this may take a minute)...[/bold purple]")
if no_reflection:
console.print("[bold purple]Generating Roadmap (reflection disabled)...[/bold purple]")
else:
console.print("[bold purple]Generating Roadmap with self-reflection (this may take a minute)...[/bold purple]")

graph = build_graph()
initial_state = {"pr_context": pr_context}
initial_state = {"pr_context": pr_context, "skip_reflection": no_reflection}

result = graph.invoke(initial_state)
roadmap_content = result.get("roadmap", "")
Expand Down
Loading