diff --git a/.bumpversion.toml b/.bumpversion.toml index face748..8ebe12e 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -33,7 +33,7 @@ search = 'version = "{current_version}"' replace = 'version = "{new_version}"' [[tool.bumpversion.files]] -filename = "src/__version__.py" +filename = "src/codebase_rag/__version__.py" search = '__version__ = "{current_version}"' replace = '__version__ = "{new_version}"' diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 1117c02..ebb3172 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -30,7 +30,7 @@ jobs: echo "pyproject.toml version: $PROJECT_VERSION" # Get version from __version__.py - VERSION_PY=$(grep '__version__ = ' src/__version__.py | cut -d'"' -f2) + VERSION_PY=$(grep '__version__ = ' src/codebase_rag/__version__.py | cut -d'"' -f2) echo "__version__.py version: $VERSION_PY" # Validate Python version file @@ -70,7 +70,7 @@ jobs: bun-version: latest - name: Build Frontend - run: ./build-frontend.sh + run: ./scripts/build-frontend.sh - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -121,7 +121,7 @@ jobs: bun-version: latest - name: Build Frontend - run: ./build-frontend.sh + run: ./scripts/build-frontend.sh - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -172,7 +172,7 @@ jobs: bun-version: latest - name: Build Frontend - run: ./build-frontend.sh + run: ./scripts/build-frontend.sh - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 diff --git a/.github/workflows/docs-deploy.yml b/.github/workflows/docs-deploy.yml index 875323f..bcd490d 100644 --- a/.github/workflows/docs-deploy.yml +++ b/.github/workflows/docs-deploy.yml @@ -70,4 +70,4 @@ jobs: - name: Notify deployment run: | echo "๐Ÿ“š Documentation deployed successfully!" - echo "๐Ÿ”— URL: https://code-graph.vantagecraft.dev" + echo "๐Ÿ”— URL: https://vantagecraft.dev/docs/code-graph/" diff --git a/Dockerfile b/Dockerfile index 09cfbef..be73276 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,10 +50,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # Copy application source code for local package installation COPY pyproject.toml README.md ./ -COPY api ./api -COPY core ./core -COPY services ./services -COPY mcp_tools ./mcp_tools +COPY src ./src COPY *.py ./ # Install local package (without dependencies, already installed) @@ -95,7 +92,7 @@ COPY --from=builder /usr/local/bin/uvicorn /usr/local/bin/ COPY --chown=appuser:appuser . . # Copy pre-built frontend (if exists) -# Run ./build-frontend.sh before docker build to generate frontend/dist +# Run ./scripts/build-frontend.sh before docker build to generate frontend/dist # If frontend/dist doesn't exist, the app will run as API-only (no web UI) RUN if [ -d frontend/dist ]; then \ mkdir -p static && \ @@ -103,7 +100,7 @@ RUN if [ -d frontend/dist ]; then \ echo "โœ… Frontend copied to static/"; \ else \ echo "โš ๏ธ No frontend/dist found - running as API-only"; \ - echo " Run ./build-frontend.sh to build frontend"; \ + echo " Run ./scripts/build-frontend.sh to build frontend"; \ fi # Switch to non-root user @@ -129,6 +126,6 @@ EXPOSE 8000 8080 HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ CMD curl -f http://localhost:8080/api/v1/health || exit 1 -# Default command - starts HTTP API (not MCP) -# For MCP service, run on host: python start_mcp.py -CMD ["python", "start.py"] +# Default command - starts both MCP and Web services (dual-port mode) +# Alternative: python -m codebase_rag --mcp (MCP only) or --web (Web only) +CMD ["python", "-m", "codebase_rag"] diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index ee7c6ea..d29e2f0 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -2,7 +2,7 @@ # Base Docker image for Code Graph Knowledge System # # IMPORTANT: Frontend MUST be pre-built before docker build: -# ./build-frontend.sh +# ./scripts/build-frontend.sh # # This Dockerfile expects frontend/dist/ to exist @@ -53,7 +53,7 @@ COPY --chown=appuser:appuser services ./services COPY --chown=appuser:appuser mcp_tools ./mcp_tools COPY --chown=appuser:appuser start.py start_mcp.py mcp_server.py config.py main.py ./ -# Copy pre-built frontend (MUST exist - run ./build-frontend.sh first) +# Copy pre-built frontend (MUST exist - run ./scripts/build-frontend.sh first) COPY --chown=appuser:appuser frontend/dist ./static USER appuser diff --git a/docker/Dockerfile.full b/docker/Dockerfile.full index 6c4cf9e..4393b4d 100644 --- a/docker/Dockerfile.full +++ b/docker/Dockerfile.full @@ -2,7 +2,7 @@ # Full Docker image - All features (LLM + Embedding required) # # IMPORTANT: Frontend MUST be pre-built before docker build: -# ./build-frontend.sh +# ./scripts/build-frontend.sh # # This Dockerfile expects frontend/dist/ to exist @@ -48,13 +48,9 @@ COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/pytho COPY --from=builder /usr/local/bin/uvicorn /usr/local/bin/ # Copy application code -COPY --chown=appuser:appuser api ./api -COPY --chown=appuser:appuser core ./core -COPY --chown=appuser:appuser services ./services -COPY --chown=appuser:appuser mcp_tools ./mcp_tools -COPY --chown=appuser:appuser start.py start_mcp.py mcp_server.py config.py main.py ./ +COPY --chown=appuser:appuser src ./src -# Copy pre-built frontend (MUST exist - run ./build-frontend.sh first) +# Copy pre-built frontend (MUST exist - run ./scripts/build-frontend.sh first) COPY --chown=appuser:appuser frontend/dist ./static USER appuser @@ -67,4 +63,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ CMD curl -f http://localhost:8080/api/v1/health || exit 1 # Start application (dual-port mode) -CMD ["python", "main.py"] +CMD ["python", "-m", "codebase_rag"] diff --git a/docker/Dockerfile.minimal b/docker/Dockerfile.minimal index a711734..3b64626 100644 --- a/docker/Dockerfile.minimal +++ b/docker/Dockerfile.minimal @@ -2,7 +2,7 @@ # Minimal Docker image - Code Graph only (No LLM required) # # IMPORTANT: Frontend MUST be pre-built before docker build: -# ./build-frontend.sh +# ./scripts/build-frontend.sh # # This Dockerfile expects frontend/dist/ to exist @@ -48,13 +48,9 @@ COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/pytho COPY --from=builder /usr/local/bin/uvicorn /usr/local/bin/ # Copy application code -COPY --chown=appuser:appuser api ./api -COPY --chown=appuser:appuser core ./core -COPY --chown=appuser:appuser services ./services -COPY --chown=appuser:appuser mcp_tools ./mcp_tools -COPY --chown=appuser:appuser start.py start_mcp.py mcp_server.py config.py main.py ./ +COPY --chown=appuser:appuser src ./src -# Copy pre-built frontend (MUST exist - run ./build-frontend.sh first) +# Copy pre-built frontend (MUST exist - run ./scripts/build-frontend.sh first) COPY --chown=appuser:appuser frontend/dist ./static USER appuser @@ -67,4 +63,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ CMD curl -f http://localhost:8080/api/v1/health || exit 1 # Start application (dual-port mode) -CMD ["python", "main.py"] +CMD ["python", "-m", "codebase_rag"] diff --git a/docker/Dockerfile.standard b/docker/Dockerfile.standard index df53260..f461b19 100644 --- a/docker/Dockerfile.standard +++ b/docker/Dockerfile.standard @@ -2,7 +2,7 @@ # Standard Docker image - Code Graph + Memory Store (Embedding required) # # IMPORTANT: Frontend MUST be pre-built before docker build: -# ./build-frontend.sh +# ./scripts/build-frontend.sh # # This Dockerfile expects frontend/dist/ to exist @@ -48,13 +48,9 @@ COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/pytho COPY --from=builder /usr/local/bin/uvicorn /usr/local/bin/ # Copy application code -COPY --chown=appuser:appuser api ./api -COPY --chown=appuser:appuser core ./core -COPY --chown=appuser:appuser services ./services -COPY --chown=appuser:appuser mcp_tools ./mcp_tools -COPY --chown=appuser:appuser start.py start_mcp.py mcp_server.py config.py main.py ./ +COPY --chown=appuser:appuser src ./src -# Copy pre-built frontend (MUST exist - run ./build-frontend.sh first) +# Copy pre-built frontend (MUST exist - run ./scripts/build-frontend.sh first) COPY --chown=appuser:appuser frontend/dist ./static USER appuser @@ -67,4 +63,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ CMD curl -f http://localhost:8080/api/v1/health || exit 1 # Start application (dual-port mode) -CMD ["python", "main.py"] +CMD ["python", "-m", "codebase_rag"] diff --git a/docs/CNAME b/docs/CNAME deleted file mode 100644 index 0e88ad8..0000000 --- a/docs/CNAME +++ /dev/null @@ -1 +0,0 @@ -code-graph.vantagecraft.dev diff --git a/docs/api/mcp-tools.md b/docs/api/mcp-tools.md index dac4011..eda0b2a 100644 --- a/docs/api/mcp-tools.md +++ b/docs/api/mcp-tools.md @@ -30,7 +30,7 @@ The MCP server provides AI assistants (like Claude Desktop, VS Code with MCP, et ```bash # Using start script -python start_mcp.py +python -m codebase_rag --mcp # Using uv (recommended) uv run mcp_client diff --git a/docs/api/python-sdk.md b/docs/api/python-sdk.md index 47aaa52..d1235eb 100644 --- a/docs/api/python-sdk.md +++ b/docs/api/python-sdk.md @@ -106,12 +106,12 @@ OPENROUTER_MODEL=anthropic/claude-3-opus ### Import Services ```python -from services.neo4j_knowledge_service import Neo4jKnowledgeService -from services.memory_store import MemoryStore, memory_store -from services.graph_service import Neo4jGraphService, graph_service -from services.code_ingestor import CodeIngestor, get_code_ingestor -from services.task_queue import TaskQueue, task_queue -from config import settings +from src.codebase_rag.services.knowledge import Neo4jKnowledgeService +from src.codebase_rag.services.memory import MemoryStore, memory_store +from src.codebase_rag.services.code import Neo4jGraphService, graph_service +from src.codebase_rag.services.code import CodeIngestor, get_code_ingestor +from src.codebase_rag.services.tasks import TaskQueue, task_queue +from src.codebase_rag.config import settings ``` ### Service Initialization Pattern @@ -148,7 +148,7 @@ Primary service for knowledge graph operations with LlamaIndex integration. ### Initialization ```python -from services.neo4j_knowledge_service import Neo4jKnowledgeService +from src.codebase_rag.services.knowledge import Neo4jKnowledgeService # Create instance knowledge_service = Neo4jKnowledgeService() @@ -405,7 +405,7 @@ Project memory persistence for AI agents. ### Initialization ```python -from services.memory_store import memory_store +from src.codebase_rag.services.memory import memory_store # Initialize (async) await memory_store.initialize() @@ -627,7 +627,7 @@ Low-level Neo4j graph operations. ### Initialization ```python -from services.graph_service import graph_service +from src.codebase_rag.services.code import graph_service # Connect to Neo4j await graph_service.connect() @@ -791,8 +791,8 @@ Repository code ingestion service. ### Initialization ```python -from services.code_ingestor import get_code_ingestor -from services.graph_service import graph_service +from src.codebase_rag.services.code import get_code_ingestor +from src.codebase_rag.services.code import graph_service # Initialize graph service first await graph_service.connect() @@ -882,7 +882,7 @@ Asynchronous task queue management. ### Initialization ```python -from services.task_queue import task_queue, TaskStatus +from src.codebase_rag.services.tasks import task_queue, TaskStatus # Start task queue await task_queue.start() @@ -921,7 +921,7 @@ async def submit_task( **Example**: ```python -from services.task_processors import process_document_task +from src.codebase_rag.services.tasks import process_document_task task_id = await task_queue.submit_task( task_func=process_document_task, @@ -1005,7 +1005,7 @@ def get_queue_stats() -> Dict[str, int]: Access configuration settings. ```python -from config import settings +from src.codebase_rag.config import settings # Neo4j settings print(settings.neo4j_uri) @@ -1034,7 +1034,7 @@ print(settings.top_k) ### Get Current Model Info ```python -from config import get_current_model_info +from src.codebase_rag.config import get_current_model_info model_info = get_current_model_info() print(f"LLM: {model_info['llm']}") @@ -1049,7 +1049,7 @@ print(f"Embedding: {model_info['embedding']}") ```python import asyncio -from services.neo4j_knowledge_service import Neo4jKnowledgeService +from src.codebase_rag.services.knowledge import Neo4jKnowledgeService async def main(): # Initialize service @@ -1087,7 +1087,7 @@ asyncio.run(main()) ```python import asyncio -from services.memory_store import memory_store +from src.codebase_rag.services.memory import memory_store async def main(): # Initialize @@ -1128,9 +1128,9 @@ asyncio.run(main()) ```python import asyncio -from services.graph_service import graph_service -from services.code_ingestor import get_code_ingestor -from services.git_utils import git_utils +from src.codebase_rag.services.code import graph_service +from src.codebase_rag.services.code import get_code_ingestor +from src.codebase_rag.services.git_utils import git_utils async def main(): # Connect to Neo4j @@ -1178,8 +1178,8 @@ asyncio.run(main()) ```python import asyncio -from services.task_queue import task_queue, TaskStatus -from services.task_processors import process_document_task +from src.codebase_rag.services.tasks import task_queue, TaskStatus +from src.codebase_rag.services.tasks import process_document_task async def main(): # Start task queue @@ -1318,7 +1318,7 @@ result = await session.run("MATCH (n) RETURN n LIMIT 10") ### 4. Set Appropriate Timeouts ```python -from config import settings +from src.codebase_rag.config import settings # Adjust timeouts for large operations settings.operation_timeout = 300 # 5 minutes @@ -1439,7 +1439,7 @@ for item in items: ```python # 60x faster for updates -from services.git_utils import git_utils +from src.codebase_rag.services.git_utils import git_utils if git_utils.is_git_repo(repo_path): changed_files = git_utils.get_changed_files(repo_path) diff --git a/docs/architecture/components.md b/docs/architecture/components.md index 1925b68..8bdafb8 100644 --- a/docs/architecture/components.md +++ b/docs/architecture/components.md @@ -1482,7 +1482,7 @@ Critical for avoiding circular dependencies: ```python # 1. Configuration (no dependencies) -from config import settings +from src.codebase_rag.config import settings # 2. Storage layer (no app dependencies) neo4j_connection = Neo4jGraphStore(...) @@ -1555,7 +1555,7 @@ class Settings(BaseSettings): Components access configuration: ```python -from config import settings +from src.codebase_rag.config import settings # Use in service self.timeout = settings.operation_timeout diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md new file mode 100644 index 0000000..1c9553a --- /dev/null +++ b/docs/architecture/overview.md @@ -0,0 +1,537 @@ +# Architecture Overview + +## Introduction + +Code Graph Knowledge System is a **hybrid intelligence platform** that serves both human users and AI agents through multiple interfaces. This document explains the system architecture, deployment modes, and how different components work together. + +## System Architecture + +### Dual-Server Design + +The system operates on **two independent ports**, each serving different purposes: + +```mermaid +graph TB + subgraph "Port 8000 - MCP SSE Service (PRIMARY)" + MCP[MCP Server] + SSE[SSE Streaming] + MCP_TOOLS[25+ MCP Tools] + end + + subgraph "Port 8080 - Web UI + REST API (SECONDARY)" + WEB[React Frontend] + REST[REST API] + METRICS[Prometheus Metrics] + end + + subgraph "Shared Backend Services" + NEO4J[Neo4j Knowledge Store] + TASK[Task Queue] + MEMORY[Memory Store] + CODE[Code Graph] + end + + AI[AI Assistants
Claude Desktop, Cursor] + USERS[Human Users
Developers, Admins] + PROGRAMS[External Systems
CI/CD, Scripts] + + AI -->|stdio/SSE| MCP + USERS -->|Browser| WEB + PROGRAMS -->|HTTP| REST + + MCP --> NEO4J + MCP --> TASK + MCP --> MEMORY + MCP --> CODE + + WEB --> NEO4J + REST --> TASK + REST --> MEMORY + REST --> CODE + + SSE -.->|Real-time updates| WEB + + style MCP fill:#e1f5e1 + style WEB fill:#e3f2fd + style REST fill:#fff9e6 +``` + +### Port 8000: MCP SSE Service + +**Purpose**: AI assistant integration and real-time communication + +**Components**: +- **MCP Protocol Server**: stdio-based communication for AI tools +- **SSE Endpoint** (`/sse`): Server-Sent Events for real-time updates +- **Message Endpoint** (`/messages/`): Async message handling + +**Primary Users**: +- AI assistants (Claude Desktop, Cursor, etc.) +- Development tools with MCP support + +**Key Features**: +- 25+ MCP tools for code intelligence +- Real-time task monitoring via SSE +- Bi-directional communication with AI agents + +### Port 8080: Web UI + REST API + +**Purpose**: Human interaction and programmatic access + +**Components**: +- **React Frontend**: Task monitoring, file upload, batch processing +- **REST API** (`/api/v1/*`): Full HTTP API for all system features +- **Prometheus Metrics** (`/metrics`): System health and performance + +**Primary Users**: +- Developers (via web browser) +- System administrators +- External applications (via HTTP API) +- CI/CD pipelines +- Custom integrations + +**Key Features**: +- Visual task monitoring dashboard +- Document upload and management +- System configuration and health monitoring +- Programmatic API access + +--- + +## Understanding the REST API + +### What is the REST API? + +The REST API provides **HTTP-based programmatic access** to all system capabilities. It allows external applications, scripts, and services to interact with the knowledge system without requiring MCP protocol support. + +### Why Do We Need REST API? + +While MCP protocol serves AI assistants, REST API enables broader integration scenarios: + +#### 1. **System Integration** +Connect Code Graph with existing enterprise tools: + +```mermaid +graph LR + A[CI/CD Pipeline
GitHub Actions] -->|POST /ingest/repo| API[REST API] + B[Slack Bot] -->|POST /knowledge/query| API + C[IDE Plugin] -->|GET /graph/related| API + D[Monitoring Dashboard] -->|GET /health| API + + API --> SERVICES[Backend Services] + + style API fill:#fff9e6 +``` + +**Example**: Automatically analyze code on every commit: +```yaml +# .github/workflows/analyze.yml +- name: Analyze Code + run: | + curl -X POST http://code-graph:8080/api/v1/ingest/repo \ + -H "Content-Type: application/json" \ + -d '{"local_path": ".", "mode": "incremental"}' +``` + +#### 2. **Custom Application Development** + +Build your own interfaces on top of Code Graph: + +```javascript +// Internal chatbot +async function askCodeQuestion(question) { + const response = await fetch('http://code-graph:8080/api/v1/knowledge/query', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ question, mode: 'hybrid' }) + }); + return await response.json(); +} +``` + +#### 3. **Automation and Scripting** + +Automate repetitive tasks: + +```python +# Daily documentation sync script +import httpx + +async def sync_docs(): + # Upload latest docs to knowledge base + response = await httpx.post( + "http://code-graph:8080/api/v1/documents/directory", + json={ + "directory_path": "/company/docs", + "recursive": True + } + ) + return response.json() +``` + +#### 4. **Cross-Language Support** + +Access from any programming language: + +```go +// Go client +func searchCode(query string) ([]Result, error) { + resp, err := http.Post( + "http://code-graph:8080/api/v1/knowledge/search", + "application/json", + bytes.NewBuffer([]byte(fmt.Sprintf(`{"query":"%s"}`, query))), + ) + // Parse and return results +} +``` + +### REST API vs MCP Protocol + +| Feature | REST API | MCP Protocol | +|---------|----------|--------------| +| **Transport** | HTTP/HTTPS | stdio / SSE | +| **Format** | JSON over HTTP | JSON-RPC | +| **Client** | Any language | AI assistants | +| **Authentication** | API keys (future) | N/A | +| **Use Case** | System integration | AI context enhancement | +| **Examples** | curl, Python, JS | Claude Desktop | + +**When to use REST API**: +- โœ… Integrating with CI/CD +- โœ… Building custom UIs +- โœ… Scripting and automation +- โœ… Cross-language access +- โœ… Webhook integrations + +**When to use MCP Protocol**: +- โœ… AI assistant integration +- โœ… IDE plugin development +- โœ… Real-time AI interactions + +--- + +## Deployment Modes + +### Three Usage Scenarios + +The system provides three startup modes for different scenarios: + +#### 1. MCP Server Only (`codebase-rag-mcp`) + +**Purpose**: AI assistant integration + +**What it starts**: +- MCP protocol server (stdio) +- Direct communication with AI tools + +**Use when**: +- Using with Claude Desktop +- Developing MCP-compatible tools +- AI-only workflows + +**Example**: +```bash +# Start MCP server +codebase-rag-mcp + +# Configure Claude Desktop +{ + "mcpServers": { + "code-graph": { + "command": "codebase-rag-mcp" + } + } +} +``` + +#### 2. Web Server (`codebase-rag-web`) + +**Purpose**: Full-featured deployment for human users and applications + +**What it starts**: +- Port 8000: MCP SSE service +- Port 8080: React frontend + REST API + +**Use when**: +- Deploying for team usage +- Need visual monitoring +- Require programmatic access +- Production environments + +**Example**: +```bash +# Start web server +codebase-rag-web + +# Access: +# - Web UI: http://localhost:8080 +# - REST API: http://localhost:8080/api/v1/ +# - MCP SSE: http://localhost:8000/sse +``` + +#### 3. Complete Service (`codebase-rag`) + +**Purpose**: Development and comprehensive deployment + +**What it starts**: +- Everything from web server mode +- Full system capabilities +- All interfaces available + +**Use when**: +- Local development +- Testing all features +- Production deployment with all services + +--- + +## Component Architecture + +### Backend Services + +All backend services are shared across both ports: + +#### 1. **Neo4j Knowledge Store** +- Graph database for code relationships +- Native vector index for semantic search +- Hybrid query engine + +#### 2. **Task Queue** +- Asynchronous processing for heavy operations +- Real-time progress tracking +- Retry and error handling + +#### 3. **Memory Store** +- Project knowledge persistence +- Decision and preference tracking +- Temporal knowledge management + +#### 4. **Code Graph Service** +- Repository ingestion and analysis +- Symbol relationship tracking +- Impact analysis engine + +### Frontend Components + +#### React Web UI +- **Task Monitor**: Real-time progress visualization +- **Document Upload**: File and directory processing +- **System Dashboard**: Health and statistics +- **Configuration**: System settings management + +Built with: +- **React** + **TanStack Router**: Modern SPA +- **TanStack Query**: Data fetching and caching +- **Tailwind CSS**: Responsive design +- **Recharts**: Data visualization + +--- + +## Data Flow + +### Typical Request Flows + +#### AI Assistant Query Flow + +```mermaid +sequenceDiagram + participant AI as AI Assistant + participant MCP as MCP Server :8000 + participant Services as Backend Services + participant Neo4j as Neo4j Database + + AI->>MCP: MCP Tool Call
query_knowledge + MCP->>Services: Process Query + Services->>Neo4j: Graph + Vector Search + Neo4j-->>Services: Results + Services-->>MCP: Formatted Response + MCP-->>AI: Tool Result +``` + +#### REST API Request Flow + +```mermaid +sequenceDiagram + participant Client as HTTP Client + participant REST as REST API :8080 + participant Queue as Task Queue + participant Services as Backend Services + participant Neo4j as Neo4j Database + + Client->>REST: POST /api/v1/ingest/repo + REST->>Queue: Submit Task + Queue-->>REST: Task ID + REST-->>Client: 202 Accepted
{task_id: "..."} + + Queue->>Services: Process Repository + Services->>Neo4j: Store Code Graph + Neo4j-->>Services: Success + Services-->>Queue: Complete + + Client->>REST: GET /api/v1/tasks/{task_id} + REST-->>Client: Task Status
{status: "SUCCESS"} +``` + +#### Real-time Monitoring Flow + +```mermaid +sequenceDiagram + participant Browser as Web Browser + participant Frontend as React App :8080 + participant SSE as SSE Endpoint :8000 + participant Queue as Task Queue + + Browser->>Frontend: Open Task Monitor + Frontend->>SSE: Connect SSE
GET /sse/tasks + SSE-->>Frontend: Connection Established + + loop Real-time Updates + Queue->>SSE: Task Progress Event + SSE-->>Frontend: data: {...} + Frontend->>Browser: Update UI + end +``` + +--- + +## Technology Stack + +### Backend +- **Python 3.13+**: Core runtime +- **FastAPI**: Web framework +- **Neo4j 5.x**: Graph database +- **LlamaIndex**: LLM integration framework +- **Prometheus**: Metrics and monitoring + +### Frontend +- **React 18**: UI framework +- **TypeScript**: Type safety +- **Bun**: Package manager and bundler +- **TanStack Router**: Client-side routing +- **Tailwind CSS**: Styling + +### Integration +- **MCP Protocol**: AI assistant communication +- **Server-Sent Events**: Real-time updates +- **REST API**: HTTP-based access + +### Storage +- **Neo4j**: Primary data store + - Document storage + - Vector embeddings + - Graph relationships + - Memory persistence + +--- + +## Scalability Considerations + +### Horizontal Scaling + +The system supports horizontal scaling: + +```mermaid +graph TB + LB[Load Balancer] + + subgraph "Web Servers" + W1[Server 1:8080] + W2[Server 2:8080] + W3[Server N:8080] + end + + subgraph "MCP Servers" + M1[Server 1:8000] + M2[Server 2:8000] + M3[Server N:8000] + end + + subgraph "Shared State" + NEO4J[(Neo4j Cluster)] + REDIS[(Redis Cache)] + end + + LB --> W1 + LB --> W2 + LB --> W3 + + LB --> M1 + LB --> M2 + LB --> M3 + + W1 --> NEO4J + W2 --> NEO4J + W3 --> NEO4J + + M1 --> NEO4J + M2 --> NEO4J + M3 --> NEO4J + + W1 -.-> REDIS + W2 -.-> REDIS + W3 -.-> REDIS +``` + +### Performance Optimization + +1. **Task Queue**: Offload heavy operations +2. **Caching**: Redis for frequently accessed data +3. **Connection Pooling**: Efficient database connections +4. **Incremental Processing**: Only process changed files + +--- + +## Security Architecture + +### Current Security Model + +**Authentication**: Currently no authentication required (development mode) + +**Network Security**: +- Bind to localhost by default +- Configurable host/port via environment variables + +**Data Security**: +- No sensitive data storage by default +- User responsible for network security + +### Future Enhancements + +Planned security features: + +1. **API Authentication**: + - JWT token authentication + - API key management + - Role-based access control (RBAC) + +2. **Data Encryption**: + - TLS/HTTPS support + - At-rest encryption for sensitive data + +3. **Audit Logging**: + - Request logging + - Access tracking + - Change history + +--- + +## Summary + +Code Graph Knowledge System is a multi-interface platform that serves: + +1. **AI Assistants**: Via MCP protocol on port 8000 +2. **Human Users**: Via React UI on port 8080 +3. **External Systems**: Via REST API on port 8080 + +This architecture enables: +- โœ… Flexible deployment modes +- โœ… Broad integration possibilities +- โœ… Scalable multi-user support +- โœ… Real-time monitoring and feedback + +Choose your deployment mode based on your needs: +- **MCP only**: AI assistant integration +- **Web server**: Team collaboration + API access +- **Complete service**: Full-featured deployment + +For detailed API documentation, see [REST API Reference](../api/rest.md). diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md index c8417f9..cf21d1e 100644 --- a/docs/deployment/docker.md +++ b/docs/deployment/docker.md @@ -81,7 +81,7 @@ http://localhost:8080/api/v1/ curl -fsSL https://bun.sh/install | bash # Build frontend -./build-frontend.sh +./scripts/build-frontend.sh ``` This pre-builds the React frontend and generates static files in `frontend/dist/`, which are then copied into the Docker image. The production image does not include Node.js, npm, or any frontend build tools (~405MB savings). @@ -94,7 +94,7 @@ git clone https://github.com/royisme/codebase-rag.git cd codebase-rag # Build frontend first (REQUIRED) -./build-frontend.sh +./scripts/build-frontend.sh # Build minimal docker build -f docker/Dockerfile.minimal -t my-codebase-rag:minimal . diff --git a/docs/development/contributing.md b/docs/development/contributing.md index 864b6dc..74eb77d 100644 --- a/docs/development/contributing.md +++ b/docs/development/contributing.md @@ -188,8 +188,8 @@ from fastapi import FastAPI, HTTPException from neo4j import GraphDatabase # Local imports -from services.neo4j_knowledge_service import Neo4jKnowledgeService -from core.config import settings +from src.codebase_rag.services.knowledge import Neo4jKnowledgeService +from src.codebase_rag.core.config import settings ``` **Type Hints:** diff --git a/docs/development/migration-guide.md b/docs/development/migration-guide.md new file mode 100644 index 0000000..88cd2a5 --- /dev/null +++ b/docs/development/migration-guide.md @@ -0,0 +1,413 @@ +# Migration Guide: v0.7.x to v0.8.0 + +Complete guide for migrating from the old directory structure to the new src-layout. + +**Release Date**: 2025-11-06 +**Breaking Changes**: Yes +**Migration Effort**: Low (15-30 minutes) + +--- + +## ๐Ÿ“‹ Summary of Changes + +Version 0.8.0 introduces a complete restructuring to adopt Python's standard src-layout. This brings better organization, clearer package boundaries, and follows Python best practices. + +### Major Changes + +1. **All code moved to `src/codebase_rag/`** +2. **All old entry scripts removed** +3. **Import paths updated** +4. **New standardized entry points** +5. **Backward compatibility removed** + +--- + +## ๐Ÿšจ Breaking Changes + +### 1. Entry Scripts Removed + +**Old** (โŒ No longer works): +```bash +python start.py +python start_mcp.py +python main.py +``` + +**New** (โœ… Use these instead): +```bash +# Direct module invocation +python -m codebase_rag # Start both services +python -m codebase_rag --web # Web only +python -m codebase_rag --mcp # MCP only +python -m codebase_rag --version + +# After installation (pip install -e .) +codebase-rag # Main CLI +codebase-rag-web # Web server +codebase-rag-mcp # MCP server +``` + +### 2. Import Paths Changed + +**Old** (โŒ No longer works): +```python +from config import settings +from services.neo4j_knowledge_service import Neo4jKnowledgeService +from services.memory_store import MemoryStore +from core.app import create_app +from api.routes import router +from mcp_tools.utils import some_function +``` + +**New** (โœ… Use these instead): +```python +from src.codebase_rag.config import settings +from src.codebase_rag.services.knowledge import Neo4jKnowledgeService +from src.codebase_rag.services.memory import MemoryStore +from src.codebase_rag.core.app import create_app +from src.codebase_rag.api.routes import router +from src.codebase_rag.mcp.utils import some_function +``` + +### 3. Directory Structure Changed + +**Old Structure** (โŒ Removed): +``` +codebase-rag/ +โ”œโ”€โ”€ api/ # โŒ Deleted +โ”œโ”€โ”€ core/ # โŒ Deleted +โ”œโ”€โ”€ services/ # โŒ Deleted +โ”œโ”€โ”€ mcp_tools/ # โŒ Deleted +โ”œโ”€โ”€ config.py # โŒ Deleted +โ”œโ”€โ”€ main.py # โŒ Deleted +โ”œโ”€โ”€ start.py # โŒ Deleted +โ””โ”€โ”€ start_mcp.py # โŒ Deleted +``` + +**New Structure** (โœ… Current): +``` +codebase-rag/ +โ”œโ”€โ”€ src/ +โ”‚ โ””โ”€โ”€ codebase_rag/ # โœ… All code here +โ”‚ โ”œโ”€โ”€ __init__.py +โ”‚ โ”œโ”€โ”€ __main__.py +โ”‚ โ”œโ”€โ”€ config/ +โ”‚ โ”œโ”€โ”€ server/ +โ”‚ โ”œโ”€โ”€ core/ +โ”‚ โ”œโ”€โ”€ api/ +โ”‚ โ”œโ”€โ”€ services/ +โ”‚ โ””โ”€โ”€ mcp/ # Renamed from mcp_tools +โ”œโ”€โ”€ pyproject.toml # โœ… Updated +โ”œโ”€โ”€ docs/ +โ”œโ”€โ”€ tests/ +โ””โ”€โ”€ ... +``` + +### 4. Docker Changes + +**Dockerfile CMD** changed: + +```dockerfile +# Old +CMD ["python", "start.py"] + +# New +CMD ["python", "-m", "codebase_rag"] +``` + +--- + +## ๐Ÿ”„ Migration Steps + +### For End Users (Docker Deployment) + +If you're using Docker, **no changes needed**! Just pull the new image: + +```bash +# Pull latest +docker pull royisme/codebase-rag:latest + +# Or rebuild +docker-compose down +docker-compose pull +docker-compose up -d +``` + +### For Developers (Local Development) + +#### Step 1: Update Repository + +```bash +# Pull latest changes +git pull origin main + +# Or if on a branch +git fetch origin +git rebase origin/main +``` + +#### Step 2: Reinstall Package + +```bash +# Remove old installation +pip uninstall code-graph -y + +# Reinstall with new structure +pip install -e . + +# Or with uv +uv pip install -e . +``` + +#### Step 3: Update Your Code + +**Update all import statements** in your custom scripts/tools: + +```python +# Old imports (need to update) +from config import settings +from services.xxx import Yyy + +# New imports +from src.codebase_rag.config import settings +from src.codebase_rag.services.xxx import Yyy +``` + +**Find all files to update:** +```bash +# Search for old imports in your codebase +grep -r "from config import" . +grep -r "from services\." . +grep -r "from core\." . +grep -r "from api\." . +grep -r "from mcp_tools\." . +``` + +#### Step 4: Update Entry Scripts + +If you have custom scripts that call the server: + +```python +# Old +if __name__ == "__main__": + from start import main + main() + +# New +if __name__ == "__main__": + from src.codebase_rag.server.web import main + main() +``` + +Or better, use the standard module invocation: + +```python +import subprocess +subprocess.run(["python", "-m", "codebase_rag"]) +``` + +#### Step 5: Update MCP Configurations + +If using MCP (Claude Desktop, Cursor, etc.): + +**Old** `claude_desktop_config.json`: +```json +{ + "mcpServers": { + "codebase-rag": { + "command": "python", + "args": ["/path/to/codebase-rag/start_mcp.py"] + } + } +} +``` + +**New**: +```json +{ + "mcpServers": { + "codebase-rag": { + "command": "python", + "args": ["-m", "codebase_rag", "--mcp"], + "cwd": "/path/to/codebase-rag" + } + } +} +``` + +Or after installation: +```json +{ + "mcpServers": { + "codebase-rag": { + "command": "codebase-rag-mcp" + } + } +} +``` + +--- + +## ๐Ÿงช Testing Your Migration + +After migration, test all functionality: + +### 1. Test Import Paths + +```python +# Test configuration import +from src.codebase_rag.config import settings +print(f"โœ… Config: {settings.app_name}") + +# Test service imports +from src.codebase_rag.services.knowledge import Neo4jKnowledgeService +print("โœ… Services import successful") +``` + +### 2. Test Entry Points + +```bash +# Test version +python -m codebase_rag --version +# Should output: codebase-rag version 0.8.0 + +# Test help +python -m codebase_rag --help + +# Test web server (Ctrl+C to stop) +python -m codebase_rag --web +``` + +### 3. Test Docker + +```bash +# Build test image +docker build -t codebase-rag:test . + +# Run test container +docker run -p 8000:8000 -p 8080:8080 codebase-rag:test + +# Check health +curl http://localhost:8080/api/v1/health +``` + +### 4. Run Tests + +```bash +# Run test suite +pytest tests/ -v + +# Run with coverage +pytest tests/ --cov=src/codebase_rag --cov-report=html +``` + +--- + +## ๐Ÿ“ Common Issues + +### Issue 1: ModuleNotFoundError + +**Error:** +``` +ModuleNotFoundError: No module named 'config' +``` + +**Solution:** +Update import to new path: +```python +from src.codebase_rag.config import settings +``` + +### Issue 2: start.py not found + +**Error:** +``` +python: can't open file 'start.py': [Errno 2] No such file or directory +``` + +**Solution:** +Use new entry point: +```bash +python -m codebase_rag +``` + +### Issue 3: Old imports in tests + +**Error:** +``` +ImportError: cannot import name 'Neo4jKnowledgeService' from 'services.neo4j_knowledge_service' +``` + +**Solution:** +Update test imports: +```python +from src.codebase_rag.services.knowledge import Neo4jKnowledgeService +``` + +### Issue 4: Docker container fails to start + +**Error:** +``` +python: can't open file 'start.py' +``` + +**Solution:** +Rebuild Docker image: +```bash +docker-compose down +docker-compose build --no-cache +docker-compose up -d +``` + +--- + +## ๐ŸŽฏ Benefits of New Structure + +### 1. Standard Python Package + +- โœ… Follows PyPA src-layout recommendations +- โœ… Proper package namespace (`codebase_rag`) +- โœ… Cleaner imports + +### 2. Better Organization + +- โœ… All source code in `src/` +- โœ… Clear separation of concerns +- โœ… Logical service grouping + +### 3. Easier Development + +- โœ… Standard entry points (`python -m codebase_rag`) +- โœ… Proper console scripts after installation +- โœ… No confusion about root vs package code + +### 4. Improved Maintainability + +- โœ… No duplicate code +- โœ… Clear module boundaries +- โœ… Easier to navigate for new contributors + +--- + +## ๐Ÿ“š Additional Resources + +- [Python Packaging Guide](https://packaging.python.org/en/latest/tutorials/packaging-projects/) +- [src-layout vs flat-layout](https://setuptools.pypa.io/en/latest/userguide/package_discovery.html#src-layout) +- [Development Setup](./setup.md) +- [Python SDK Guide](../api/python-sdk.md) + +--- + +## ๐Ÿ†˜ Need Help? + +If you encounter issues not covered in this guide: + +1. Check [Troubleshooting](../troubleshooting.md) +2. Check [FAQ](../faq.md) +3. Open an issue on GitHub +4. Ask in Discussions + +--- + +**Last Updated**: 2025-11-06 +**Next Version**: 0.9.0 (planned) diff --git a/docs/development/setup.md b/docs/development/setup.md index bae3e3e..ee2507c 100644 --- a/docs/development/setup.md +++ b/docs/development/setup.md @@ -593,7 +593,7 @@ ollama list ```bash # Start the application -python start.py +python -m codebase_rag # You should see: # โœ“ All service health checks passed diff --git a/docs/development/testing.md b/docs/development/testing.md index a2aab34..2db7287 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.md @@ -81,7 +81,7 @@ import pytest @pytest.mark.unit async def test_parse_memory_type(): """Test memory type parsing logic.""" - from services.memory_store import parse_memory_type + from src.codebase_rag.services.memory import parse_memory_type result = parse_memory_type("decision") assert result == "decision" @@ -513,7 +513,7 @@ def test_with_env_vars(mocker): 'NEO4J_PASSWORD': 'testpass' }) - from core.config import settings + from src.codebase_rag.core.config import settings assert settings.neo4j_uri == 'bolt://test:7687' ``` @@ -678,7 +678,7 @@ and memory relationships. import pytest from typing import Dict, Any -from services.memory_store import MemoryStore +from src.codebase_rag.services.memory import MemoryStore class TestMemoryStore: diff --git a/docs/faq.md b/docs/faq.md index e983278..959ff11 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -158,7 +158,7 @@ pip install -e . # Follow: https://neo4j.com/docs/operations-manual/current/installation/ # Configure and run -python start.py +python -m codebase_rag ``` **Note**: Docker is recommended for easier setup and isolation. @@ -396,7 +396,7 @@ OPENAI_EMBEDDING_MODEL=text-embedding-3-small # Restart docker-compose restart api # or -pkill -f start.py && python start.py +pkill -f start.py && python -m codebase_rag ``` No data migration needed - embeddings are recalculated automatically. @@ -887,7 +887,7 @@ jobs: ```bash # In your build.sh python -c " -from services.memory_store import MemoryStore +from src.codebase_rag.services.memory import MemoryStore # Auto-extract memories after build " ``` diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index a3f42b5..c72d4cb 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -119,10 +119,10 @@ cp env.example .env nano .env # Start MCP server -python start_mcp.py +python -m codebase_rag --mcp # Or start FastAPI server -python start.py +python -m codebase_rag ``` ## Verify Installation diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 5a96189..446e3e2 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -2,7 +2,25 @@ Get Code Graph Knowledge System up and running in 5 minutes! -## ๐ŸŽฏ Choose Your Path +## ๐ŸŽฏ Choose Your Deployment Mode + +Code Graph Knowledge System offers **three deployment modes** based on which features you need: + +| Mode | Description | Ports | LLM Required | Use Case | +|------|-------------|-------|--------------|----------| +| **Minimal** | Code Graph only | 7474, 7687, 8000, 8080 | โŒ No | Static code analysis, repository exploration | +| **Standard** | Code Graph + Memory Store | 7474, 7687, 8000, 8080 | Embedding only | Project knowledge tracking, AI agent memory | +| **Full** | All Features + Knowledge RAG | 7474, 7687, 8000, 8080 | LLM + Embedding | Complete intelligent knowledge management | + +!!! info "What's Running?" + All modes start **two servers**: + + - **Port 8000**: MCP SSE Service (for AI assistants) + - **Port 8080**: Web UI + REST API (for humans & programs) + + See [Architecture Overview](../architecture/overview.md) to understand how these work together. + +## ๐Ÿš€ Choose Your Path === "Minimal (Recommended)" **Code Graph only** - No LLM required @@ -74,6 +92,54 @@ You should see: - โœ… API running at http://localhost:8000 - โœ… API docs at http://localhost:8000/docs +## ๐Ÿ“ก Understanding the Interfaces + +After starting the services, you have **three ways** to interact with the system: + +### 1. REST API (Port 8080) + +**For**: Programmatic access, scripts, CI/CD integration + +```bash +# Health check +curl http://localhost:8080/api/v1/health + +# Query knowledge +curl -X POST http://localhost:8080/api/v1/knowledge/query \ + -H "Content-Type: application/json" \ + -d '{"question": "How does authentication work?"}' +``` + +**Use cases**: +- Automation scripts +- CI/CD pipelines +- Custom applications +- Testing and monitoring + +[Full REST API Documentation](../api/rest.md) + +### 2. Web UI (Port 8080) + +**For**: Human users, visual monitoring + +Open in browser: http://localhost:8080 + +Features: +- ๐Ÿ“Š Task monitoring dashboard +- ๐Ÿ“ File and directory upload +- ๐Ÿ“ˆ System health and statistics +- โš™๏ธ Configuration management + +### 3. MCP Protocol (Port 8000) + +**For**: AI assistants (Claude Desktop, Cursor, etc.) + +Configure your AI tool to connect via MCP. The system provides 25+ tools for code intelligence. + +[MCP Integration Guide](../guide/mcp/overview.md) + +--- + ## ๐Ÿš€ First Steps ### 1. Access Neo4j Browser diff --git a/docs/guide/code-graph/ingestion.md b/docs/guide/code-graph/ingestion.md index 6f49482..db57474 100644 --- a/docs/guide/code-graph/ingestion.md +++ b/docs/guide/code-graph/ingestion.md @@ -863,8 +863,8 @@ ORDER BY count DESC For complex workflows, use the Python API directly: ```python -from services.graph_service import graph_service -from services.code_ingestor import CodeIngestor +from src.codebase_rag.services.code import graph_service +from src.codebase_rag.services.code import CodeIngestor # Initialize await graph_service.connect() diff --git a/docs/guide/code-graph/overview.md b/docs/guide/code-graph/overview.md index 1ace079..91bda4c 100644 --- a/docs/guide/code-graph/overview.md +++ b/docs/guide/code-graph/overview.md @@ -205,10 +205,10 @@ POST /api/v1/code-graph/context-pack - Build context pack For custom integrations, use Python services directly: ```python -from services.graph_service import graph_service -from services.code_ingestor import code_ingestor -from services.ranker import ranker -from services.pack_builder import pack_builder +from src.codebase_rag.services.code import graph_service +from src.codebase_rag.services.code import code_ingestor +from src.codebase_rag.services.ranker import ranker +from src.codebase_rag.services.code import pack_builder ``` ## Deployment Modes diff --git a/docs/guide/mcp/claude-desktop.md b/docs/guide/mcp/claude-desktop.md index f5c687c..cbadfd7 100644 --- a/docs/guide/mcp/claude-desktop.md +++ b/docs/guide/mcp/claude-desktop.md @@ -33,7 +33,7 @@ You need a running instance: docker-compose -f docker/docker-compose.full.yml up -d # Option 2: Local development -python start_mcp.py +python -m codebase_rag --mcp # Verify it's running ps aux | grep start_mcp.py @@ -288,7 +288,7 @@ If tools don't appear: tail -f /path/to/codebase-rag/mcp_server.log # Enable debug mode -MCP_LOG_LEVEL=DEBUG python start_mcp.py +MCP_LOG_LEVEL=DEBUG python -m codebase_rag --mcp ``` **Claude Desktop Logs**: @@ -544,7 +544,7 @@ After tool calls: ```bash # Test the command manually cd /path/to/codebase-rag - python start_mcp.py + python -m codebase_rag --mcp ``` 4. **Review MCP server logs**: diff --git a/docs/guide/mcp/overview.md b/docs/guide/mcp/overview.md index 74aa91c..37a27b4 100644 --- a/docs/guide/mcp/overview.md +++ b/docs/guide/mcp/overview.md @@ -370,13 +370,13 @@ ENABLE_MEMORY_STORE=true ```bash # Direct execution -python start_mcp.py +python -m codebase_rag --mcp # Using uv uv run mcp_server # With custom config -MCP_LOG_LEVEL=DEBUG python start_mcp.py +MCP_LOG_LEVEL=DEBUG python -m codebase_rag --mcp ``` ### Client Configuration @@ -575,7 +575,7 @@ REQUEST_TIMEOUT=30 # seconds tail -f mcp_server.log # Enable debug logging -MCP_LOG_LEVEL=DEBUG python start_mcp.py +MCP_LOG_LEVEL=DEBUG python -m codebase_rag --mcp ``` ### Tool Call Tracing diff --git a/docs/guide/mcp/vscode.md b/docs/guide/mcp/vscode.md index 7651a43..22e67fd 100644 --- a/docs/guide/mcp/vscode.md +++ b/docs/guide/mcp/vscode.md @@ -50,7 +50,7 @@ Ensure the MCP server is accessible: ```bash # Running locally cd /path/to/codebase-rag -python start_mcp.py +python -m codebase_rag --mcp # Or via Docker docker-compose -f docker/docker-compose.full.yml up -d @@ -186,7 +186,7 @@ uv pip install -e . "command": "ssh", "args": [ "user@remote-server", - "cd /path/to/codebase-rag && python start_mcp.py" + "cd /path/to/codebase-rag && python -m codebase_rag --mcp" ] } } @@ -666,7 +666,7 @@ For multiple projects, use workspace folders: 2. **Verify command works**: ```bash cd /path/to/codebase-rag - python start_mcp.py + python -m codebase_rag --mcp # Should not exit immediately ``` diff --git a/docs/guide/memory/extraction.md b/docs/guide/memory/extraction.md index 289d6e1..9d27520 100644 --- a/docs/guide/memory/extraction.md +++ b/docs/guide/memory/extraction.md @@ -106,7 +106,7 @@ curl -X POST http://localhost:8000/api/v1/memory/extract/conversation \ **Python Service**: ```python -from services.memory_extractor import memory_extractor +from src.codebase_rag.services.memory import memory_extractor result = await memory_extractor.extract_from_conversation( project_id="my-project", @@ -284,7 +284,7 @@ curl -X POST http://localhost:8000/api/v1/memory/extract/commit \ **Python Service**: ```python -from services.memory_extractor import memory_extractor +from src.codebase_rag.services.memory import memory_extractor result = await memory_extractor.extract_from_git_commit( project_id="my-project", @@ -457,7 +457,7 @@ curl -X POST http://localhost:8000/api/v1/memory/extract/comments \ **Python Service**: ```python -from services.memory_extractor import memory_extractor +from src.codebase_rag.services.memory import memory_extractor result = await memory_extractor.extract_from_code_comments( project_id="my-project", @@ -637,7 +637,7 @@ curl -X POST http://localhost:8000/api/v1/memory/suggest \ **Python Service**: ```python -from services.memory_extractor import memory_extractor +from src.codebase_rag.services.memory import memory_extractor result = await memory_extractor.suggest_memory_from_query( project_id="my-project", @@ -689,8 +689,8 @@ if result['should_save']: ### Integration with Knowledge Service ```python -from services.neo4j_knowledge_service import knowledge_service -from services.memory_extractor import memory_extractor +from src.codebase_rag.services.knowledge import knowledge_service +from src.codebase_rag.services.memory import memory_extractor async def query_with_memory_suggestion( project_id: str, @@ -768,7 +768,7 @@ curl -X POST http://localhost:8000/api/v1/memory/extract/batch \ **Python Service**: ```python -from services.memory_extractor import memory_extractor +from src.codebase_rag.services.memory import memory_extractor result = await memory_extractor.batch_extract_from_repository( project_id="my-project", @@ -951,7 +951,7 @@ import subprocess import sys sys.path.insert(0, '/path/to/project') -from services.memory_extractor import memory_extractor +from src.codebase_rag.services.memory import memory_extractor async def main(): # Get commit details @@ -1035,7 +1035,7 @@ OPENAI_API_KEY=your-key Adjust auto-save threshold (default: 0.7): ```python -from services.memory_extractor import memory_extractor +from src.codebase_rag.services.memory import memory_extractor # Lower threshold (more auto-saves) memory_extractor.confidence_threshold = 0.6 @@ -1049,7 +1049,7 @@ memory_extractor.confidence_threshold = 0.8 Adjust processing limits: ```python -from services.memory_extractor import MemoryExtractor +from src.codebase_rag.services.memory import MemoryExtractor # Custom limits MemoryExtractor.MAX_COMMITS_TO_PROCESS = 30 diff --git a/docs/guide/memory/manual.md b/docs/guide/memory/manual.md index bf0ff63..3379198 100644 --- a/docs/guide/memory/manual.md +++ b/docs/guide/memory/manual.md @@ -69,7 +69,7 @@ curl -X POST http://localhost:8000/api/v1/memory/add \ **Python Service**: ```python -from services.memory_store import memory_store +from src.codebase_rag.services.memory import memory_store result = await memory_store.add_memory( project_id="my-project", diff --git a/docs/guide/memory/overview.md b/docs/guide/memory/overview.md index 4ca0268..45afc19 100644 --- a/docs/guide/memory/overview.md +++ b/docs/guide/memory/overview.md @@ -376,7 +376,7 @@ curl -X POST http://localhost:8000/api/v1/memory/search \ For Python applications: ```python -from services.memory_store import memory_store +from src.codebase_rag.services.memory import memory_store import asyncio async def main(): diff --git a/docs/guide/memory/search.md b/docs/guide/memory/search.md index 85c3f1a..ec25428 100644 --- a/docs/guide/memory/search.md +++ b/docs/guide/memory/search.md @@ -59,7 +59,7 @@ curl -X POST http://localhost:8000/api/v1/memory/search \ **Python Service**: ```python -from services.memory_store import memory_store +from src.codebase_rag.services.memory import memory_store result = await memory_store.search_memories( project_id="my-project", diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index e0df1b1..6762e13 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -51,7 +51,7 @@ curl http://localhost:8000/api/v1/health ```bash # Check all services -python start.py --check +python -m codebase_rag --check # Check logs tail -f logs/application.log @@ -778,10 +778,10 @@ Error: MCP server failed to start ```bash # Try starting manually -python start_mcp.py +python -m codebase_rag --mcp # Check logs -python start_mcp.py 2>&1 | tee mcp.log +python -m codebase_rag --mcp 2>&1 | tee mcp.log ``` **Solutions:** diff --git a/config/sky.yml b/examples/configs/sky.yml similarity index 100% rename from config/sky.yml rename to examples/configs/sky.yml diff --git a/mcp_tools/README.md b/mcp_tools/README.md deleted file mode 100644 index 48ba31b..0000000 --- a/mcp_tools/README.md +++ /dev/null @@ -1,141 +0,0 @@ -# MCP Tools - Modular Structure - -This directory contains the modularized MCP Server v2 implementation. The code has been split from a single 1454-line file into logical, maintainable modules. - -## Directory Structure - -``` -mcp_tools/ -โ”œโ”€โ”€ __init__.py # Package exports for all handlers and utilities -โ”œโ”€โ”€ tool_definitions.py # Tool definitions (495 lines) -โ”œโ”€โ”€ utils.py # Utility functions (140 lines) -โ”œโ”€โ”€ knowledge_handlers.py # Knowledge base handlers (135 lines) -โ”œโ”€โ”€ code_handlers.py # Code graph handlers (173 lines) -โ”œโ”€โ”€ memory_handlers.py # Memory store handlers (168 lines) -โ”œโ”€โ”€ task_handlers.py # Task management handlers (245 lines) -โ”œโ”€โ”€ system_handlers.py # System handlers (73 lines) -โ”œโ”€โ”€ resources.py # Resource handlers (84 lines) -โ””โ”€โ”€ prompts.py # Prompt handlers (91 lines) -``` - -## Module Descriptions - -### `__init__.py` -Central import point for the package. Exports all handlers, utilities, and definitions for use in the main server file. - -### `tool_definitions.py` -Contains the `get_tool_definitions()` function that returns all 25 tool definitions organized by category: -- Knowledge Base (5 tools) -- Code Graph (4 tools) -- Memory Store (7 tools) -- Task Management (6 tools) -- System (3 tools) - -### `utils.py` -Contains the `format_result()` function that formats handler results for display, with specialized formatting for: -- Query results with answers -- Search results -- Memory search results -- Code graph results -- Context packs -- Task lists -- Queue statistics - -### `knowledge_handlers.py` -Handlers for knowledge base operations: -- `handle_query_knowledge()` - Query using GraphRAG -- `handle_search_similar_nodes()` - Vector similarity search -- `handle_add_document()` - Add document (sync/async based on size) -- `handle_add_file()` - Add single file -- `handle_add_directory()` - Add directory (async) - -### `code_handlers.py` -Handlers for code graph operations: -- `handle_code_graph_ingest_repo()` - Ingest repository (full/incremental) -- `handle_code_graph_related()` - Find related files -- `handle_code_graph_impact()` - Analyze impact/dependencies -- `handle_context_pack()` - Build context pack for AI agents - -### `memory_handlers.py` -Handlers for memory store operations: -- `handle_add_memory()` - Add new memory -- `handle_search_memories()` - Search with filters -- `handle_get_memory()` - Get by ID -- `handle_update_memory()` - Update existing -- `handle_delete_memory()` - Soft delete -- `handle_supersede_memory()` - Replace with history -- `handle_get_project_summary()` - Project overview - -### `task_handlers.py` -Handlers for task queue operations: -- `handle_get_task_status()` - Get single task status -- `handle_watch_task()` - Monitor task until completion -- `handle_watch_tasks()` - Monitor multiple tasks -- `handle_list_tasks()` - List with filters -- `handle_cancel_task()` - Cancel task -- `handle_get_queue_stats()` - Queue statistics - -### `system_handlers.py` -Handlers for system operations: -- `handle_get_graph_schema()` - Get Neo4j schema -- `handle_get_statistics()` - Get KB statistics -- `handle_clear_knowledge_base()` - Clear all data (dangerous) - -### `resources.py` -MCP resource handlers: -- `get_resource_list()` - List available resources -- `read_resource_content()` - Read resource content (config, status) - -### `prompts.py` -MCP prompt handlers: -- `get_prompt_list()` - List available prompts -- `get_prompt_content()` - Get prompt content (suggest_queries) - -## Service Injection Pattern - -All handlers use dependency injection for services. Services are passed as parameters from the main server file: - -```python -# Example from knowledge_handlers.py -async def handle_query_knowledge(args: Dict, knowledge_service) -> Dict: - result = await knowledge_service.query( - question=args["question"], - mode=args.get("mode", "hybrid") - ) - return result - -# Called from mcp_server_v2.py -result = await handle_query_knowledge(arguments, knowledge_service) -``` - -This pattern: -- Keeps handlers testable (easy to mock services) -- Makes dependencies explicit -- Allows handlers to be pure functions -- Enables better code organization - -## Main Server File - -The main `mcp_server_v2.py` (310 lines) is now much cleaner: -- Imports all handlers from `mcp_tools` -- Initializes services -- Routes tool calls to appropriate handlers -- Handles resources and prompts - -## Benefits of Modularization - -1. **Maintainability**: Each module has a single responsibility -2. **Readability**: Easier to find and understand code -3. **Testability**: Modules can be tested independently -4. **Scalability**: Easy to add new handlers without cluttering main file -5. **Reusability**: Handlers can potentially be reused in other contexts - -## Usage - -The modularization is transparent to users. The server is used exactly the same way: - -```bash -python start_mcp_v2.py -``` - -All tools, resources, and prompts work identically to the previous implementation. diff --git a/mcp_tools/__init__.py b/mcp_tools/__init__.py deleted file mode 100644 index a47defd..0000000 --- a/mcp_tools/__init__.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -MCP Tools Package - -This package contains modularized handlers for MCP Server v2. -All tool handlers, utilities, and definitions are organized into logical modules. -""" - -# Knowledge base handlers -from .knowledge_handlers import ( - handle_query_knowledge, - handle_search_similar_nodes, - handle_add_document, - handle_add_file, - handle_add_directory, -) - -# Code graph handlers -from .code_handlers import ( - handle_code_graph_ingest_repo, - handle_code_graph_related, - handle_code_graph_impact, - handle_context_pack, -) - -# Memory store handlers -from .memory_handlers import ( - handle_add_memory, - handle_search_memories, - handle_get_memory, - handle_update_memory, - handle_delete_memory, - handle_supersede_memory, - handle_get_project_summary, - # v0.7 Automatic extraction - handle_extract_from_conversation, - handle_extract_from_git_commit, - handle_extract_from_code_comments, - handle_suggest_memory_from_query, - handle_batch_extract_from_repository, -) - -# Task management handlers -from .task_handlers import ( - handle_get_task_status, - handle_watch_task, - handle_watch_tasks, - handle_list_tasks, - handle_cancel_task, - handle_get_queue_stats, -) - -# System handlers -from .system_handlers import ( - handle_get_graph_schema, - handle_get_statistics, - handle_clear_knowledge_base, -) - -# Tool definitions -from .tool_definitions import get_tool_definitions - -# Utilities -from .utils import format_result - -# Resources -from .resources import get_resource_list, read_resource_content - -# Prompts -from .prompts import get_prompt_list, get_prompt_content - - -__all__ = [ - # Knowledge handlers - "handle_query_knowledge", - "handle_search_similar_nodes", - "handle_add_document", - "handle_add_file", - "handle_add_directory", - # Code handlers - "handle_code_graph_ingest_repo", - "handle_code_graph_related", - "handle_code_graph_impact", - "handle_context_pack", - # Memory handlers - "handle_add_memory", - "handle_search_memories", - "handle_get_memory", - "handle_update_memory", - "handle_delete_memory", - "handle_supersede_memory", - "handle_get_project_summary", - # v0.7 Extraction handlers - "handle_extract_from_conversation", - "handle_extract_from_git_commit", - "handle_extract_from_code_comments", - "handle_suggest_memory_from_query", - "handle_batch_extract_from_repository", - # Task handlers - "handle_get_task_status", - "handle_watch_task", - "handle_watch_tasks", - "handle_list_tasks", - "handle_cancel_task", - "handle_get_queue_stats", - # System handlers - "handle_get_graph_schema", - "handle_get_statistics", - "handle_clear_knowledge_base", - # Tool definitions - "get_tool_definitions", - # Utilities - "format_result", - # Resources - "get_resource_list", - "read_resource_content", - # Prompts - "get_prompt_list", - "get_prompt_content", -] diff --git a/mkdocs.yml b/mkdocs.yml index 9bc4a4e..b88e061 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,10 +1,12 @@ site_name: Code Graph Knowledge System -site_url: https://code-graph.vantagecraft.dev +site_url: https://vantagecraft.dev/docs/code-graph/ site_description: AI-powered code intelligence and knowledge management system site_author: VantageCraft repo_url: https://github.com/royisme/codebase-rag repo_name: codebase-rag edit_uri: edit/main/docs/ +use_directory_urls: true + theme: name: material @@ -170,6 +172,7 @@ nav: - MCP Tools: api/mcp-tools.md - Python SDK: api/python-sdk.md - Architecture: + - Overview: architecture/overview.md - System Design: architecture/design.md - Components: architecture/components.md - Data Flow: architecture/dataflow.md @@ -177,6 +180,7 @@ nav: - Contributing: development/contributing.md - Development Setup: development/setup.md - Testing: development/testing.md + - Migration Guide (v0.8.0): development/migration-guide.md - Version Management: development/version-management.md - Changelog Automation: development/changelog-automation.md - Release Process: development/release.md diff --git a/pyproject.toml b/pyproject.toml index 75a1d1a..f41e2e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,12 +50,15 @@ dev = [ ] [project.scripts] -server = "start:main" -mcp_client = "start_mcp:main" +codebase-rag = "codebase_rag.__main__:main" +codebase-rag-web = "codebase_rag.server.web:main" +codebase-rag-mcp = "codebase_rag.server.mcp:main" [tool.setuptools] -packages = ["api", "core", "services", "mcp_tools"] -py-modules = ["start", "start_mcp", "mcp_server", "config", "main"] +packages = {find = {where = ["src"]}} + +[tool.setuptools.package-data] +codebase_rag = ["py.typed"] [tool.pytest.ini_options] minversion = "6.0" @@ -73,7 +76,7 @@ asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" [tool.coverage.run] -source = ["mcp_tools", "services", "api", "core"] +source = ["src/codebase_rag"] omit = [ "*/tests/*", "*/test_*.py", diff --git a/build-frontend.sh b/scripts/build-frontend.sh similarity index 98% rename from build-frontend.sh rename to scripts/build-frontend.sh index 42e3f44..043ab2f 100755 --- a/build-frontend.sh +++ b/scripts/build-frontend.sh @@ -8,7 +8,7 @@ # to the Docker image's /app/static directory. # # Usage: -# ./build-frontend.sh [--clean] +# ./scripts/build-frontend.sh [--clean] # # Options: # --clean Clean node_modules and dist before building diff --git a/scripts/bump-version.sh b/scripts/bump-version.sh index f8453c4..491d7c0 100755 --- a/scripts/bump-version.sh +++ b/scripts/bump-version.sh @@ -81,11 +81,11 @@ if [[ -z "$DRY_RUN" ]]; then echo -e "${YELLOW}This will:${NC}" if [[ "$GENERATE_CHANGELOG" == true ]]; then echo " 1. Generate changelog from git commits" - echo " 2. Update version in pyproject.toml, src/__version__.py" + echo " 2. Update version in pyproject.toml, src/codebase_rag/__version__.py" echo " 3. Create a git commit" echo " 4. Create a git tag v$NEW_VERSION" else - echo " 1. Update version in pyproject.toml, src/__version__.py" + echo " 1. Update version in pyproject.toml, src/codebase_rag/__version__.py" echo " 2. Create a git commit" echo " 3. Create a git tag v$NEW_VERSION" fi diff --git a/docker-start.sh b/scripts/docker-start.sh similarity index 97% rename from docker-start.sh rename to scripts/docker-start.sh index 0930b59..24560d5 100755 --- a/docker-start.sh +++ b/scripts/docker-start.sh @@ -148,5 +148,5 @@ echo -e "${YELLOW}Useful commands:${NC}" echo -e " View logs: docker compose logs -f" echo -e " Stop services: docker compose down" echo -e " Restart: docker compose restart" -echo -e " Bootstrap Neo4j: docker compose exec app python -c 'from services.graph_service import graph_service; graph_service._setup_schema()'" +echo -e " Bootstrap Neo4j: docker compose exec app python -c 'from src.codebase_rag.services.graph import graph_service; graph_service._setup_schema()'" echo "" diff --git a/docker-stop.sh b/scripts/docker-stop.sh similarity index 100% rename from docker-stop.sh rename to scripts/docker-stop.sh diff --git a/scripts/neo4j_bootstrap.sh b/scripts/neo4j_bootstrap.sh index 9760ef9..64862a2 100755 --- a/scripts/neo4j_bootstrap.sh +++ b/scripts/neo4j_bootstrap.sh @@ -17,7 +17,7 @@ NC='\033[0m' # No Color # Script directory SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -SCHEMA_FILE="$PROJECT_ROOT/services/graph/schema.cypher" +SCHEMA_FILE="$PROJECT_ROOT/src/codebase_rag/services/graph/schema.cypher" echo -e "${GREEN}========================================${NC}" echo -e "${GREEN}Neo4j Schema Bootstrap${NC}" @@ -152,7 +152,7 @@ if __name__ == "__main__": user = os.getenv("NEO4J_USER", "neo4j") password = os.getenv("NEO4J_PASSWORD", "password") database = os.getenv("NEO4J_DATABASE", "neo4j") - schema_file = sys.argv[1] if len(sys.argv) > 1 else "services/graph/schema.cypher" + schema_file = sys.argv[1] if len(sys.argv) > 1 else "src/codebase_rag/services/graph/schema.cypher" print(f"Connecting to {uri} as {user}...") apply_schema(uri, user, password, database, schema_file) diff --git a/services/__init__.py b/services/__init__.py deleted file mode 100644 index 3a86e8d..0000000 --- a/services/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Services module initialization \ No newline at end of file diff --git a/src/codebase_rag/__init__.py b/src/codebase_rag/__init__.py new file mode 100644 index 0000000..115c339 --- /dev/null +++ b/src/codebase_rag/__init__.py @@ -0,0 +1,26 @@ +""" +Codebase RAG - Code Knowledge Graph and RAG System. + +A comprehensive system for code analysis, knowledge extraction, and RAG-based querying. +Supports MCP protocol for AI assistant integration. +""" + +from codebase_rag.__version__ import ( + __version__, + __version_info__, + get_version, + get_version_info, + get_features, + FEATURES, + DEPLOYMENT_MODES, +) + +__all__ = [ + "__version__", + "__version_info__", + "get_version", + "get_version_info", + "get_features", + "FEATURES", + "DEPLOYMENT_MODES", +] diff --git a/src/codebase_rag/__main__.py b/src/codebase_rag/__main__.py new file mode 100644 index 0000000..042137a --- /dev/null +++ b/src/codebase_rag/__main__.py @@ -0,0 +1,56 @@ +""" +Main entry point for codebase-rag package. + +Usage: + python -m codebase_rag [--web|--mcp|--version] +""" + +import sys +import argparse + + +def main(): + """Main entry point for the package.""" + parser = argparse.ArgumentParser( + description="Codebase RAG - Code Knowledge Graph and RAG System" + ) + parser.add_argument( + "--version", + action="store_true", + help="Show version information", + ) + parser.add_argument( + "--web", + action="store_true", + help="Start web server (FastAPI)", + ) + parser.add_argument( + "--mcp", + action="store_true", + help="Start MCP server", + ) + + args = parser.parse_args() + + if args.version: + from codebase_rag import __version__ + print(f"codebase-rag version {__version__}") + return 0 + + if args.mcp: + # Run MCP server + print("Starting MCP server...") + from codebase_rag.server.mcp import main as mcp_main + return mcp_main() + + if args.web or not any([args.web, args.mcp, args.version]): + # Default: start web server + print("Starting web server...") + from codebase_rag.server.web import main as web_main + return web_main() + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/__version__.py b/src/codebase_rag/__version__.py similarity index 100% rename from src/__version__.py rename to src/codebase_rag/__version__.py diff --git a/api/__init__.py b/src/codebase_rag/api/__init__.py similarity index 100% rename from api/__init__.py rename to src/codebase_rag/api/__init__.py diff --git a/api/memory_routes.py b/src/codebase_rag/api/memory_routes.py similarity index 99% rename from api/memory_routes.py rename to src/codebase_rag/api/memory_routes.py index 0445b68..ae779c4 100644 --- a/api/memory_routes.py +++ b/src/codebase_rag/api/memory_routes.py @@ -11,8 +11,7 @@ from pydantic import BaseModel, Field from typing import Optional, List, Dict, Any, Literal -from services.memory_store import memory_store -from services.memory_extractor import memory_extractor +from codebase_rag.services.memory import memory_store, memory_extractor from loguru import logger diff --git a/api/neo4j_routes.py b/src/codebase_rag/api/neo4j_routes.py similarity index 98% rename from api/neo4j_routes.py rename to src/codebase_rag/api/neo4j_routes.py index dfd011c..326c5d4 100644 --- a/api/neo4j_routes.py +++ b/src/codebase_rag/api/neo4j_routes.py @@ -8,7 +8,7 @@ import tempfile import os -from services.neo4j_knowledge_service import neo4j_knowledge_service +from codebase_rag.services.knowledge import neo4j_knowledge_service router = APIRouter(prefix="/neo4j-knowledge", tags=["Neo4j Knowledge Graph"]) diff --git a/api/routes.py b/src/codebase_rag/api/routes.py similarity index 98% rename from api/routes.py rename to src/codebase_rag/api/routes.py index 072acd7..2bdc710 100644 --- a/api/routes.py +++ b/src/codebase_rag/api/routes.py @@ -5,17 +5,12 @@ import uuid from datetime import datetime -from services.sql_parser import sql_analyzer -from services.graph_service import graph_service -from services.neo4j_knowledge_service import Neo4jKnowledgeService -from services.universal_sql_schema_parser import parse_sql_schema_smart -from services.task_queue import task_queue -from services.code_ingestor import get_code_ingestor -from services.git_utils import git_utils -from services.ranker import ranker -from services.pack_builder import pack_builder -from services.metrics import metrics_service -from config import settings +from codebase_rag.services.sql import sql_analyzer, parse_sql_schema_smart +from codebase_rag.services.code import graph_service, get_code_ingestor, pack_builder +from codebase_rag.services.knowledge import Neo4jKnowledgeService +from codebase_rag.services.tasks import task_queue +from codebase_rag.services.utils import git_utils, ranker, metrics_service +from codebase_rag.config import settings from loguru import logger # create router diff --git a/api/sse_routes.py b/src/codebase_rag/api/sse_routes.py similarity index 99% rename from api/sse_routes.py rename to src/codebase_rag/api/sse_routes.py index 9e123ad..84c1921 100644 --- a/api/sse_routes.py +++ b/src/codebase_rag/api/sse_routes.py @@ -9,7 +9,7 @@ from fastapi.responses import StreamingResponse from loguru import logger -from services.task_queue import task_queue, TaskStatus +from codebase_rag.services.tasks import task_queue, TaskStatus router = APIRouter(prefix="/sse", tags=["SSE"]) diff --git a/api/task_routes.py b/src/codebase_rag/api/task_routes.py similarity index 99% rename from api/task_routes.py rename to src/codebase_rag/api/task_routes.py index 9956272..1e86e6a 100644 --- a/api/task_routes.py +++ b/src/codebase_rag/api/task_routes.py @@ -9,10 +9,9 @@ from pydantic import BaseModel from datetime import datetime -from services.task_queue import task_queue, TaskStatus -from services.task_storage import TaskType +from codebase_rag.services.tasks import task_queue, TaskStatus, TaskType from loguru import logger -from config import settings +from codebase_rag.config import settings router = APIRouter(prefix="/tasks", tags=["Task Management"]) diff --git a/api/websocket_routes.py b/src/codebase_rag/api/websocket_routes.py similarity index 99% rename from api/websocket_routes.py rename to src/codebase_rag/api/websocket_routes.py index 9531d47..94a80bd 100644 --- a/api/websocket_routes.py +++ b/src/codebase_rag/api/websocket_routes.py @@ -9,7 +9,7 @@ import json from loguru import logger -from services.task_queue import task_queue +from codebase_rag.services.tasks import task_queue router = APIRouter() diff --git a/src/codebase_rag/config/__init__.py b/src/codebase_rag/config/__init__.py new file mode 100644 index 0000000..188a239 --- /dev/null +++ b/src/codebase_rag/config/__init__.py @@ -0,0 +1,28 @@ +""" +Configuration module for Codebase RAG. + +This module exports all configuration-related objects and functions. +""" + +from codebase_rag.config.settings import Settings, settings +from codebase_rag.config.validation import ( + validate_neo4j_connection, + validate_ollama_connection, + validate_openai_connection, + validate_gemini_connection, + validate_openrouter_connection, + get_current_model_info, +) + +__all__ = [ + # Settings + "Settings", + "settings", + # Validation functions + "validate_neo4j_connection", + "validate_ollama_connection", + "validate_openai_connection", + "validate_gemini_connection", + "validate_openrouter_connection", + "get_current_model_info", +] diff --git a/config.py b/src/codebase_rag/config/settings.py similarity index 58% rename from config.py rename to src/codebase_rag/config/settings.py index b1625b8..ab9cf0f 100644 --- a/config.py +++ b/src/codebase_rag/config/settings.py @@ -1,7 +1,15 @@ +""" +Configuration settings for Codebase RAG. + +This module defines all application settings using Pydantic Settings. +Settings can be configured via environment variables or .env file. +""" + from pydantic_settings import BaseSettings from pydantic import Field from typing import Optional, Literal + class Settings(BaseSettings): # Application Settings app_name: str = "Code Graph Knowledge Service" @@ -19,197 +27,92 @@ class Settings(BaseSettings): # Vector Search Settings (using Neo4j built-in vector index) vector_index_name: str = Field(default="knowledge_vectors", description="Neo4j vector index name") vector_dimension: int = Field(default=384, description="Vector embedding dimension") - + # Neo4j Graph Database neo4j_uri: str = Field(default="bolt://localhost:7687", description="Neo4j connection URI", alias="NEO4J_URI") neo4j_username: str = Field(default="neo4j", description="Neo4j username", alias="NEO4J_USER") neo4j_password: str = Field(default="password", description="Neo4j password", alias="NEO4J_PASSWORD") neo4j_database: str = Field(default="neo4j", description="Neo4j database name") - + # LLM Provider Configuration llm_provider: Literal["ollama", "openai", "gemini", "openrouter"] = Field( - default="ollama", - description="LLM provider to use", + default="ollama", + description="LLM provider to use", alias="LLM_PROVIDER" ) - + # Ollama LLM Service ollama_base_url: str = Field(default="http://localhost:11434", description="Ollama service URL", alias="OLLAMA_HOST") ollama_model: str = Field(default="llama2", description="Ollama model name", alias="OLLAMA_MODEL") - + # OpenAI Configuration openai_api_key: Optional[str] = Field(default=None, description="OpenAI API key", alias="OPENAI_API_KEY") openai_model: str = Field(default="gpt-3.5-turbo", description="OpenAI model name", alias="OPENAI_MODEL") openai_base_url: Optional[str] = Field(default=None, description="OpenAI API base URL", alias="OPENAI_BASE_URL") - + # Google Gemini Configuration google_api_key: Optional[str] = Field(default=None, description="Google API key", alias="GOOGLE_API_KEY") gemini_model: str = Field(default="gemini-pro", description="Gemini model name", alias="GEMINI_MODEL") - + # OpenRouter Configuration openrouter_api_key: Optional[str] = Field(default=None, description="OpenRouter API key", alias="OPENROUTER_API_KEY") openrouter_base_url: str = Field(default="https://openrouter.ai/api/v1", description="OpenRouter API base URL", alias="OPENROUTER_BASE_URL") openrouter_model: Optional[str] = Field(default="openai/gpt-3.5-turbo", description="OpenRouter model", alias="OPENROUTER_MODEL") openrouter_max_tokens: int = Field(default=2048, description="OpenRouter max tokens for completion", alias="OPENROUTER_MAX_TOKENS") - + # Embedding Provider Configuration embedding_provider: Literal["ollama", "openai", "gemini", "huggingface", "openrouter"] = Field( - default="ollama", - description="Embedding provider to use", + default="ollama", + description="Embedding provider to use", alias="EMBEDDING_PROVIDER" ) - + # Ollama Embedding ollama_embedding_model: str = Field(default="nomic-embed-text", description="Ollama embedding model", alias="OLLAMA_EMBEDDING_MODEL") - + # OpenAI Embedding openai_embedding_model: str = Field(default="text-embedding-ada-002", description="OpenAI embedding model", alias="OPENAI_EMBEDDING_MODEL") - + # Gemini Embedding gemini_embedding_model: str = Field(default="models/embedding-001", description="Gemini embedding model", alias="GEMINI_EMBEDDING_MODEL") - + # HuggingFace Embedding huggingface_embedding_model: str = Field(default="BAAI/bge-small-en-v1.5", description="HuggingFace embedding model", alias="HF_EMBEDDING_MODEL") - + # OpenRouter Embedding openrouter_embedding_model: str = Field(default="text-embedding-ada-002", description="OpenRouter embedding model", alias="OPENROUTER_EMBEDDING_MODEL") - + # Model Parameters temperature: float = Field(default=0.1, description="LLM temperature") max_tokens: int = Field(default=2048, description="Maximum tokens for LLM response") - + # RAG Settings chunk_size: int = Field(default=512, description="Text chunk size for processing") chunk_overlap: int = Field(default=50, description="Chunk overlap size") top_k: int = Field(default=5, description="Top K results for retrieval") - + # Timeout Settings connection_timeout: int = Field(default=30, description="Connection timeout in seconds") operation_timeout: int = Field(default=120, description="Operation timeout in seconds") large_document_timeout: int = Field(default=300, description="Large document processing timeout in seconds") - + # Document Processing Settings max_document_size: int = Field(default=10 * 1024 * 1024, description="Maximum document size in bytes (10MB)") max_payload_size: int = Field(default=50 * 1024 * 1024, description="Maximum task payload size for storage (50MB)") - + # API Settings cors_origins: list = Field(default=["*"], description="CORS allowed origins") api_key: Optional[str] = Field(default=None, description="API authentication key") - + # logging log_file: Optional[str] = Field(default="app.log", description="Log file path") log_level: str = Field(default="INFO", description="Log level") - + class Config: env_file = ".env" env_file_encoding = "utf-8" - extra = "ignore" # ๅฟฝ็•ฅ้ขๅค–็š„ๅญ—ๆฎต๏ผŒ้ฟๅ…้ชŒ่ฏ้”™่ฏฏ + extra = "ignore" # Ignore extra fields to avoid validation errors + # Global settings instance settings = Settings() - -# Validation functions - -def validate_neo4j_connection(): - """Validate Neo4j connection parameters""" - try: - from neo4j import GraphDatabase - driver = GraphDatabase.driver( - settings.neo4j_uri, - auth=(settings.neo4j_username, settings.neo4j_password) - ) - with driver.session() as session: - session.run("RETURN 1") - driver.close() - return True - except Exception as e: - print(f"Neo4j connection failed: {e}") - return False - -def validate_ollama_connection(): - """Validate Ollama service connection""" - try: - import httpx - response = httpx.get(f"{settings.ollama_base_url}/api/tags") - return response.status_code == 200 - except Exception as e: - print(f"Ollama connection failed: {e}") - return False - -def validate_openai_connection(): - """Validate OpenAI API connection""" - if not settings.openai_api_key: - print("OpenAI API key not provided") - return False - try: - import openai - client = openai.OpenAI( - api_key=settings.openai_api_key, - base_url=settings.openai_base_url - ) - # Test with a simple completion - response = client.chat.completions.create( - model=settings.openai_model, - messages=[{"role": "user", "content": "test"}], - max_tokens=1 - ) - return True - except Exception as e: - print(f"OpenAI connection failed: {e}") - return False - -def validate_gemini_connection(): - """Validate Google Gemini API connection""" - if not settings.google_api_key: - print("Google API key not provided") - return False - try: - import google.generativeai as genai - genai.configure(api_key=settings.google_api_key) - model = genai.GenerativeModel(settings.gemini_model) - # Test with a simple generation - response = model.generate_content("test") - return True - except Exception as e: - print(f"Gemini connection failed: {e}") - return False - -def validate_openrouter_connection(): - """Validate OpenRouter API connection""" - if not settings.openrouter_api_key: - print("OpenRouter API key not provided") - return False - try: - import httpx - # We'll use the models endpoint to check the connection - headers = { - "Authorization": f"Bearer {settings.openrouter_api_key}", - # OpenRouter requires these headers for identification - "HTTP-Referer": "CodeGraphKnowledgeService", - "X-Title": "CodeGraph Knowledge Service" - } - response = httpx.get("https://openrouter.ai/api/v1/models", headers=headers) - return response.status_code == 200 - except Exception as e: - print(f"OpenRouter connection failed: {e}") - return False - -def get_current_model_info(): - """Get information about currently configured models""" - return { - "llm_provider": settings.llm_provider, - "llm_model": { - "ollama": settings.ollama_model, - "openai": settings.openai_model, - "gemini": settings.gemini_model, - "openrouter": settings.openrouter_model - }.get(settings.llm_provider), - "embedding_provider": settings.embedding_provider, - "embedding_model": { - "ollama": settings.ollama_embedding_model, - "openai": settings.openai_embedding_model, - "gemini": settings.gemini_embedding_model, - "huggingface": settings.huggingface_embedding_model, - "openrouter": settings.openrouter_embedding_model - }.get(settings.embedding_provider) - } diff --git a/src/codebase_rag/config/validation.py b/src/codebase_rag/config/validation.py new file mode 100644 index 0000000..087bec1 --- /dev/null +++ b/src/codebase_rag/config/validation.py @@ -0,0 +1,118 @@ +""" +Validation functions for configuration settings. + +This module provides functions to validate connections to various services +like Neo4j, Ollama, OpenAI, Gemini, and OpenRouter. +""" + +from codebase_rag.config.settings import settings + + +def validate_neo4j_connection() -> bool: + """Validate Neo4j connection parameters""" + try: + from neo4j import GraphDatabase + driver = GraphDatabase.driver( + settings.neo4j_uri, + auth=(settings.neo4j_username, settings.neo4j_password) + ) + with driver.session() as session: + session.run("RETURN 1") + driver.close() + return True + except Exception as e: + print(f"Neo4j connection failed: {e}") + return False + + +def validate_ollama_connection() -> bool: + """Validate Ollama service connection""" + try: + import httpx + response = httpx.get(f"{settings.ollama_base_url}/api/tags") + return response.status_code == 200 + except Exception as e: + print(f"Ollama connection failed: {e}") + return False + + +def validate_openai_connection() -> bool: + """Validate OpenAI API connection""" + if not settings.openai_api_key: + print("OpenAI API key not provided") + return False + try: + import openai + client = openai.OpenAI( + api_key=settings.openai_api_key, + base_url=settings.openai_base_url + ) + # Test with a simple completion + response = client.chat.completions.create( + model=settings.openai_model, + messages=[{"role": "user", "content": "test"}], + max_tokens=1 + ) + return True + except Exception as e: + print(f"OpenAI connection failed: {e}") + return False + + +def validate_gemini_connection() -> bool: + """Validate Google Gemini API connection""" + if not settings.google_api_key: + print("Google API key not provided") + return False + try: + import google.generativeai as genai + genai.configure(api_key=settings.google_api_key) + model = genai.GenerativeModel(settings.gemini_model) + # Test with a simple generation + response = model.generate_content("test") + return True + except Exception as e: + print(f"Gemini connection failed: {e}") + return False + + +def validate_openrouter_connection() -> bool: + """Validate OpenRouter API connection""" + if not settings.openrouter_api_key: + print("OpenRouter API key not provided") + return False + try: + import httpx + # We'll use the models endpoint to check the connection + headers = { + "Authorization": f"Bearer {settings.openrouter_api_key}", + # OpenRouter requires these headers for identification + "HTTP-Referer": "CodeGraphKnowledgeService", + "X-Title": "CodeGraph Knowledge Service" + } + response = httpx.get("https://openrouter.ai/api/v1/models", headers=headers) + return response.status_code == 200 + except Exception as e: + print(f"OpenRouter connection failed: {e}") + return False + + +def get_current_model_info() -> dict: + """Get information about currently configured models""" + return { + "llm_provider": settings.llm_provider, + "llm_model": { + "ollama": settings.ollama_model, + "openai": settings.openai_model, + "gemini": settings.gemini_model, + "openrouter": settings.openrouter_model + }.get(settings.llm_provider), + "embedding_provider": settings.embedding_provider, + "embedding_model": { + "ollama": settings.ollama_embedding_model, + "openai": settings.openai_embedding_model, + "gemini": settings.gemini_embedding_model, + "huggingface": settings.huggingface_embedding_model, + "openrouter": settings.openrouter_embedding_model + }.get(settings.embedding_provider) + } diff --git a/core/__init__.py b/src/codebase_rag/core/__init__.py similarity index 100% rename from core/__init__.py rename to src/codebase_rag/core/__init__.py diff --git a/core/app.py b/src/codebase_rag/core/app.py similarity index 99% rename from core/app.py rename to src/codebase_rag/core/app.py index 82475ac..2e4cc75 100644 --- a/core/app.py +++ b/src/codebase_rag/core/app.py @@ -15,7 +15,7 @@ from loguru import logger import os -from config import settings +from codebase_rag.config import settings from .exception_handlers import setup_exception_handlers from .middleware import setup_middleware from .routes import setup_routes diff --git a/core/exception_handlers.py b/src/codebase_rag/core/exception_handlers.py similarity index 96% rename from core/exception_handlers.py rename to src/codebase_rag/core/exception_handlers.py index 97aa766..80c4d67 100644 --- a/core/exception_handlers.py +++ b/src/codebase_rag/core/exception_handlers.py @@ -6,7 +6,7 @@ from fastapi.responses import JSONResponse from loguru import logger -from config import settings +from codebase_rag.config import settings def setup_exception_handlers(app: FastAPI) -> None: diff --git a/core/lifespan.py b/src/codebase_rag/core/lifespan.py similarity index 90% rename from core/lifespan.py rename to src/codebase_rag/core/lifespan.py index 0a35c49..cf81b1d 100644 --- a/core/lifespan.py +++ b/src/codebase_rag/core/lifespan.py @@ -6,10 +6,9 @@ from fastapi import FastAPI from loguru import logger -from services.neo4j_knowledge_service import neo4j_knowledge_service -from services.task_queue import task_queue -from services.task_processors import processor_registry -from services.memory_store import memory_store +from codebase_rag.services.knowledge import neo4j_knowledge_service +from codebase_rag.services.tasks import task_queue, processor_registry +from codebase_rag.services.memory import memory_store @asynccontextmanager diff --git a/core/logging.py b/src/codebase_rag/core/logging.py similarity index 96% rename from core/logging.py rename to src/codebase_rag/core/logging.py index 5725a9b..104a6e3 100644 --- a/core/logging.py +++ b/src/codebase_rag/core/logging.py @@ -5,7 +5,7 @@ import sys from loguru import logger -from config import settings +from codebase_rag.config import settings def setup_logging(): diff --git a/core/mcp_sse.py b/src/codebase_rag/core/mcp_sse.py similarity index 100% rename from core/mcp_sse.py rename to src/codebase_rag/core/mcp_sse.py diff --git a/core/middleware.py b/src/codebase_rag/core/middleware.py similarity index 93% rename from core/middleware.py rename to src/codebase_rag/core/middleware.py index 7c921e1..c6cc80d 100644 --- a/core/middleware.py +++ b/src/codebase_rag/core/middleware.py @@ -6,7 +6,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.gzip import GZipMiddleware -from config import settings +from codebase_rag.config import settings def setup_middleware(app: FastAPI) -> None: diff --git a/core/routes.py b/src/codebase_rag/core/routes.py similarity index 59% rename from core/routes.py rename to src/codebase_rag/core/routes.py index 6818e04..373c3f0 100644 --- a/core/routes.py +++ b/src/codebase_rag/core/routes.py @@ -4,12 +4,12 @@ from fastapi import FastAPI -from api.routes import router -from api.neo4j_routes import router as neo4j_router -from api.task_routes import router as task_router -from api.websocket_routes import router as ws_router -from api.sse_routes import router as sse_router -from api.memory_routes import router as memory_router +from codebase_rag.api.routes import router +from codebase_rag.api.neo4j_routes import router as neo4j_router +from codebase_rag.api.task_routes import router as task_router +from codebase_rag.api.websocket_routes import router as ws_router +from codebase_rag.api.sse_routes import router as sse_router +from codebase_rag.api.memory_routes import router as memory_router def setup_routes(app: FastAPI) -> None: diff --git a/src/codebase_rag/mcp/__init__.py b/src/codebase_rag/mcp/__init__.py new file mode 100644 index 0000000..55814f3 --- /dev/null +++ b/src/codebase_rag/mcp/__init__.py @@ -0,0 +1,9 @@ +""" +MCP (Model Context Protocol) implementation for Codebase RAG. + +This module provides the MCP server and handlers for AI assistant integration. +""" + +from codebase_rag.mcp import handlers, tools, resources, prompts, utils + +__all__ = ["handlers", "tools", "resources", "prompts", "utils"] diff --git a/src/codebase_rag/mcp/handlers/__init__.py b/src/codebase_rag/mcp/handlers/__init__.py new file mode 100644 index 0000000..914b688 --- /dev/null +++ b/src/codebase_rag/mcp/handlers/__init__.py @@ -0,0 +1,11 @@ +"""MCP request handlers.""" + +from codebase_rag.mcp.handlers import ( + knowledge, + code, + memory, + tasks, + system, +) + +__all__ = ["knowledge", "code", "memory", "tasks", "system"] diff --git a/mcp_tools/code_handlers.py b/src/codebase_rag/mcp/handlers/code.py similarity index 100% rename from mcp_tools/code_handlers.py rename to src/codebase_rag/mcp/handlers/code.py diff --git a/mcp_tools/knowledge_handlers.py b/src/codebase_rag/mcp/handlers/knowledge.py similarity index 100% rename from mcp_tools/knowledge_handlers.py rename to src/codebase_rag/mcp/handlers/knowledge.py diff --git a/mcp_tools/memory_handlers.py b/src/codebase_rag/mcp/handlers/memory.py similarity index 100% rename from mcp_tools/memory_handlers.py rename to src/codebase_rag/mcp/handlers/memory.py diff --git a/mcp_tools/system_handlers.py b/src/codebase_rag/mcp/handlers/system.py similarity index 100% rename from mcp_tools/system_handlers.py rename to src/codebase_rag/mcp/handlers/system.py diff --git a/mcp_tools/task_handlers.py b/src/codebase_rag/mcp/handlers/tasks.py similarity index 100% rename from mcp_tools/task_handlers.py rename to src/codebase_rag/mcp/handlers/tasks.py diff --git a/mcp_tools/prompts.py b/src/codebase_rag/mcp/prompts.py similarity index 100% rename from mcp_tools/prompts.py rename to src/codebase_rag/mcp/prompts.py diff --git a/mcp_tools/resources.py b/src/codebase_rag/mcp/resources.py similarity index 100% rename from mcp_tools/resources.py rename to src/codebase_rag/mcp/resources.py diff --git a/mcp_server.py b/src/codebase_rag/mcp/server.py similarity index 83% rename from mcp_server.py rename to src/codebase_rag/mcp/server.py index ea4e6c1..7f8f6c0 100644 --- a/mcp_server.py +++ b/src/codebase_rag/mcp/server.py @@ -39,17 +39,17 @@ from loguru import logger # Import services -from services.neo4j_knowledge_service import Neo4jKnowledgeService -from services.memory_store import memory_store -from services.memory_extractor import memory_extractor -from services.task_queue import task_queue, TaskStatus, submit_document_processing_task, submit_directory_processing_task -from services.task_processors import processor_registry -from services.graph_service import graph_service -from services.code_ingestor import get_code_ingestor -from services.ranker import ranker -from services.pack_builder import pack_builder -from services.git_utils import git_utils -from config import settings, get_current_model_info +from codebase_rag.services.neo4j_knowledge_service import Neo4jKnowledgeService +from codebase_rag.services.memory_store import memory_store +from codebase_rag.services.memory_extractor import memory_extractor +from codebase_rag.services.task_queue import task_queue, TaskStatus, submit_document_processing_task, submit_directory_processing_task +from codebase_rag.services.task_processors import processor_registry +from codebase_rag.services.graph_service import graph_service +from codebase_rag.services.code_ingestor import get_code_ingestor +from codebase_rag.services.ranker import ranker +from codebase_rag.services.pack_builder import pack_builder +from codebase_rag.services.git_utils import git_utils +from codebase_rag.config import settings, get_current_model_info # Import MCP tools modules from mcp_tools import ( @@ -366,84 +366,9 @@ async def main(): notification_options=None, experimental_capabilities={} ) - - if search_results: - ranked = ranker.rank_files( - files=search_results, - query=keyword, - limit=10 - ) - - for file in ranked: - all_nodes.append({ - "type": "file", - "path": file["path"], - "lang": file["lang"], - "score": file["score"], - "ref": ranker.generate_ref_handle(path=file["path"]) - }) - - # Add focus files with high priority - if focus_list: - for focus_path in focus_list: - all_nodes.append({ - "type": "file", - "path": focus_path, - "lang": "unknown", - "score": 10.0, # High priority - "ref": ranker.generate_ref_handle(path=focus_path) - }) - - # Build context pack - if ctx: - await ctx.info(f"Packing {len(all_nodes)} candidate files into context...") - - context_result = pack_builder.build_context_pack( - nodes=all_nodes, - budget=budget, - stage=stage, - repo_id=repo_id, - file_limit=8, - symbol_limit=12, - enable_deduplication=True + ) ) - # Format items - items = [] - for item in context_result.get("items", []): - items.append({ - "kind": item.get("kind", "file"), - "title": item.get("title", "Unknown"), - "summary": item.get("summary", ""), - "ref": item.get("ref", ""), - "extra": { - "lang": item.get("extra", {}).get("lang"), - "score": item.get("extra", {}).get("score", 0.0) - } - }) - - if ctx: - await ctx.info(f"Context pack built: {len(items)} items, {context_result.get('budget_used', 0)} tokens") - - return { - "success": True, - "items": items, - "budget_used": context_result.get("budget_used", 0), - "budget_limit": budget, - "stage": stage, - "repo_id": repo_id, - "category_counts": context_result.get("category_counts", {}) - } - - except Exception as e: - error_msg = f"Context pack generation failed: {str(e)}" - logger.error(error_msg) - if ctx: - await ctx.error(error_msg) - return { - "success": False, - "error": error_msg - } # =================================== # MCP Resources diff --git a/mcp_tools/tool_definitions.py b/src/codebase_rag/mcp/tools.py similarity index 100% rename from mcp_tools/tool_definitions.py rename to src/codebase_rag/mcp/tools.py diff --git a/mcp_tools/utils.py b/src/codebase_rag/mcp/utils.py similarity index 100% rename from mcp_tools/utils.py rename to src/codebase_rag/mcp/utils.py diff --git a/src/codebase_rag/server/__init__.py b/src/codebase_rag/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codebase_rag/server/cli.py b/src/codebase_rag/server/cli.py new file mode 100644 index 0000000..639b0ed --- /dev/null +++ b/src/codebase_rag/server/cli.py @@ -0,0 +1,87 @@ +""" +CLI utilities and helper functions for Codebase RAG servers. +""" + +import sys +import time +from pathlib import Path +from loguru import logger + +from codebase_rag.config import ( + settings, + validate_neo4j_connection, + validate_ollama_connection, + validate_openrouter_connection, + get_current_model_info, +) + + +def check_dependencies(): + """Check service dependencies""" + logger.info("Checking service dependencies...") + + checks = [ + ("Neo4j", validate_neo4j_connection), + ] + + # Conditionally add Ollama if it is the selected LLM or embedding provider + if settings.llm_provider == "ollama" or settings.embedding_provider == "ollama": + checks.append(("Ollama", validate_ollama_connection)) + + # Conditionally add OpenRouter if it is the selected LLM or embedding provider + if settings.llm_provider == "openrouter" or settings.embedding_provider == "openrouter": + checks.append(("OpenRouter", validate_openrouter_connection)) + + all_passed = True + for service_name, check_func in checks: + try: + if check_func(): + logger.info(f"โœ“ {service_name} connection successful") + else: + logger.error(f"โœ— {service_name} connection failed") + all_passed = False + except Exception as e: + logger.error(f"โœ— {service_name} check error: {e}") + all_passed = False + + return all_passed + + +def wait_for_services(max_retries=30, retry_interval=2): + """Wait for services to start""" + logger.info("Waiting for services to start...") + + for attempt in range(1, max_retries + 1): + logger.info(f"Attempt {attempt}/{max_retries}...") + + if check_dependencies(): + logger.info("All services are ready!") + return True + + if attempt < max_retries: + logger.info(f"Waiting {retry_interval} seconds before retry...") + time.sleep(retry_interval) + + logger.error("Service startup timeout!") + return False + + +def print_startup_info(): + """Print startup information""" + print("\n" + "="*60) + print("Code Graph Knowledge Service") + print("="*60) + print(f"Version: {settings.app_version}") + print(f"Host: {settings.host}:{settings.port}") + print(f"Debug mode: {settings.debug}") + print() + print("Service configuration:") + print(f" Neo4j: {settings.neo4j_uri}") + print(f" Ollama: {settings.ollama_base_url}") + print() + model_info = get_current_model_info() + print("Model configuration:") + print(f" LLM: {model_info['llm_model']}") + print(f" Embedding: {model_info['embedding_model']}") + print("="*60) + print() diff --git a/src/codebase_rag/server/mcp.py b/src/codebase_rag/server/mcp.py new file mode 100644 index 0000000..16a1ba0 --- /dev/null +++ b/src/codebase_rag/server/mcp.py @@ -0,0 +1,45 @@ +""" +MCP Server entry point for Codebase RAG. + +This module provides the MCP (Model Context Protocol) server implementation. +""" + +import asyncio +import sys +from pathlib import Path +from loguru import logger + +# Configure logging +logger.remove() # Remove default handler +logger.add( + sys.stderr, + level="INFO", + format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {message}" +) + + +def main(): + """Main entry point for MCP server""" + try: + logger.info("=" * 70) + logger.info("MCP Server - Codebase RAG") + logger.info("=" * 70) + logger.info(f"Python: {sys.version}") + logger.info(f"Working directory: {Path.cwd()}") + + # Import and run the server from mcp/server.py + from codebase_rag.mcp.server import main as server_main + + logger.info("Starting MCP server...") + asyncio.run(server_main()) + + except KeyboardInterrupt: + logger.info("\nServer stopped by user") + sys.exit(0) + except Exception as e: + logger.error(f"Server failed to start: {e}", exc_info=True) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/main.py b/src/codebase_rag/server/web.py similarity index 87% rename from main.py rename to src/codebase_rag/server/web.py index f3e489e..a1726bc 100644 --- a/main.py +++ b/src/codebase_rag/server/web.py @@ -1,5 +1,5 @@ """ -ไธปๅบ”็”จๅ…ฅๅฃๆ–‡ไปถ +Web server entry point for Codebase RAG. ARCHITECTURE (Two-Port Setup): - Port 8000: MCP SSE Service (PRIMARY) @@ -11,10 +11,10 @@ from loguru import logger from multiprocessing import Process -from config import settings -from core.app import create_app -from core.logging import setup_logging -from core.mcp_sse import create_mcp_sse_app +from codebase_rag.config import settings +from codebase_rag.core.app import create_app +from codebase_rag.core.logging import setup_logging +from codebase_rag.core.mcp_sse import create_mcp_sse_app # setup logging setup_logging() @@ -23,13 +23,13 @@ app = create_app() # Web UI + REST API mcp_app = create_mcp_sse_app() # MCP SSE -# start server (legacy - single port) + def start_server_legacy(): """start server (legacy mode - all services on one port)""" logger.info(f"Starting server on {settings.host}:{settings.port}") uvicorn.run( - "main:app", + "src.codebase_rag.server.web:app", host=settings.host, port=settings.port, reload=settings.debug, @@ -37,7 +37,7 @@ def start_server_legacy(): access_log=settings.debug ) -# start MCP SSE server + def start_mcp_server(): """Start MCP SSE server""" logger.info("="*70) @@ -48,14 +48,14 @@ def start_mcp_server(): logger.info("="*70) uvicorn.run( - "main:mcp_app", + "src.codebase_rag.server.web:mcp_app", host=settings.host, port=settings.mcp_port, # From config: MCP_PORT (default 8000) log_level="info" if not settings.debug else "debug", access_log=False # Reduce noise ) -# start Web UI + REST API server + def start_web_server(): """Start Web UI + REST API server""" logger.info("="*70) @@ -67,7 +67,7 @@ def start_web_server(): logger.info("="*70) uvicorn.run( - "main:app", + "src.codebase_rag.server.web:app", host=settings.host, port=settings.web_ui_port, # From config: WEB_UI_PORT (default 8080) reload=settings.debug, @@ -75,6 +75,7 @@ def start_web_server(): access_log=settings.debug ) + def start_server(): """Start both servers (two-port mode)""" logger.info("\n" + "="*70) @@ -110,5 +111,11 @@ def start_server(): web_process.join() logger.info("Servers stopped") + +def main(): + """Main entry point for web server""" + start_server() + + if __name__ == "__main__": - start_server() \ No newline at end of file + main() diff --git a/src/codebase_rag/services/__init__.py b/src/codebase_rag/services/__init__.py new file mode 100644 index 0000000..297bcf6 --- /dev/null +++ b/src/codebase_rag/services/__init__.py @@ -0,0 +1,31 @@ +""" +Services module for Codebase RAG. + +This module provides all business logic services organized into logical subpackages: +- knowledge: Neo4j knowledge graph services +- memory: Conversation memory and extraction +- code: Code analysis and ingestion +- sql: SQL parsing and schema analysis +- tasks: Task queue and processing +- utils: Utility functions (git, ranking, metrics) +- pipeline: Data processing pipeline +- graph: Graph schema and utilities + +Note: Subpackages are not eagerly imported to avoid triggering heavy dependencies. +Import specific services from their subpackages as needed: + from codebase_rag.services.code import Neo4jGraphService + from codebase_rag.services.knowledge import Neo4jKnowledgeService + from codebase_rag.services.memory import MemoryStore +""" + +# Declare subpackages without eager importing to avoid dependency issues +__all__ = [ + "knowledge", + "memory", + "code", + "sql", + "tasks", + "utils", + "pipeline", + "graph", +] diff --git a/src/codebase_rag/services/code/__init__.py b/src/codebase_rag/services/code/__init__.py new file mode 100644 index 0000000..ca08fed --- /dev/null +++ b/src/codebase_rag/services/code/__init__.py @@ -0,0 +1,7 @@ +"""Code analysis and ingestion services.""" + +from codebase_rag.services.code.code_ingestor import CodeIngestor, get_code_ingestor +from codebase_rag.services.code.graph_service import Neo4jGraphService, graph_service +from codebase_rag.services.code.pack_builder import PackBuilder, pack_builder + +__all__ = ["CodeIngestor", "get_code_ingestor", "Neo4jGraphService", "PackBuilder", "graph_service", "pack_builder"] diff --git a/services/code_ingestor.py b/src/codebase_rag/services/code/code_ingestor.py similarity index 100% rename from services/code_ingestor.py rename to src/codebase_rag/services/code/code_ingestor.py diff --git a/services/graph_service.py b/src/codebase_rag/services/code/graph_service.py similarity index 99% rename from services/graph_service.py rename to src/codebase_rag/services/code/graph_service.py index afb8971..8341d45 100644 --- a/services/graph_service.py +++ b/src/codebase_rag/services/code/graph_service.py @@ -2,7 +2,7 @@ from typing import List, Dict, Optional, Any, Union from pydantic import BaseModel from loguru import logger -from config import settings +from codebase_rag.config import settings import json class GraphNode(BaseModel): diff --git a/services/pack_builder.py b/src/codebase_rag/services/code/pack_builder.py similarity index 100% rename from services/pack_builder.py rename to src/codebase_rag/services/code/pack_builder.py diff --git a/src/codebase_rag/services/graph/__init__.py b/src/codebase_rag/services/graph/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/graph/schema.cypher b/src/codebase_rag/services/graph/schema.cypher similarity index 100% rename from services/graph/schema.cypher rename to src/codebase_rag/services/graph/schema.cypher diff --git a/src/codebase_rag/services/knowledge/__init__.py b/src/codebase_rag/services/knowledge/__init__.py new file mode 100644 index 0000000..c1a909e --- /dev/null +++ b/src/codebase_rag/services/knowledge/__init__.py @@ -0,0 +1,8 @@ +"""Knowledge services for Neo4j-based knowledge graph.""" + +from codebase_rag.services.knowledge.neo4j_knowledge_service import ( + Neo4jKnowledgeService, + neo4j_knowledge_service, +) + +__all__ = ["Neo4jKnowledgeService", "neo4j_knowledge_service"] diff --git a/services/neo4j_knowledge_service.py b/src/codebase_rag/services/knowledge/neo4j_knowledge_service.py similarity index 99% rename from services/neo4j_knowledge_service.py rename to src/codebase_rag/services/knowledge/neo4j_knowledge_service.py index 301f0b3..31184f6 100644 --- a/services/neo4j_knowledge_service.py +++ b/src/codebase_rag/services/knowledge/neo4j_knowledge_service.py @@ -36,7 +36,7 @@ # Core components from llama_index.core.node_parser import SimpleNodeParser -from config import settings +from codebase_rag.config import settings class Neo4jKnowledgeService: """knowledge graph service based on Neo4j's native vector index""" diff --git a/src/codebase_rag/services/memory/__init__.py b/src/codebase_rag/services/memory/__init__.py new file mode 100644 index 0000000..c213027 --- /dev/null +++ b/src/codebase_rag/services/memory/__init__.py @@ -0,0 +1,6 @@ +"""Memory services for conversation memory and extraction.""" + +from codebase_rag.services.memory.memory_store import MemoryStore, memory_store +from codebase_rag.services.memory.memory_extractor import MemoryExtractor, memory_extractor + +__all__ = ["MemoryStore", "MemoryExtractor", "memory_store", "memory_extractor"] diff --git a/services/memory_extractor.py b/src/codebase_rag/services/memory/memory_extractor.py similarity index 99% rename from services/memory_extractor.py rename to src/codebase_rag/services/memory/memory_extractor.py index 1423268..a3e5efb 100644 --- a/services/memory_extractor.py +++ b/src/codebase_rag/services/memory/memory_extractor.py @@ -20,7 +20,7 @@ from llama_index.core import Settings from loguru import logger -from services.memory_store import memory_store +from .memory_store import memory_store class MemoryExtractor: diff --git a/services/memory_store.py b/src/codebase_rag/services/memory/memory_store.py similarity index 99% rename from services/memory_store.py rename to src/codebase_rag/services/memory/memory_store.py index 9638aff..1c0ac02 100644 --- a/services/memory_store.py +++ b/src/codebase_rag/services/memory/memory_store.py @@ -18,7 +18,7 @@ from loguru import logger from neo4j import AsyncGraphDatabase -from config import settings +from codebase_rag.config import settings class MemoryStore: diff --git a/services/pipeline/__init__.py b/src/codebase_rag/services/pipeline/__init__.py similarity index 100% rename from services/pipeline/__init__.py rename to src/codebase_rag/services/pipeline/__init__.py diff --git a/services/pipeline/base.py b/src/codebase_rag/services/pipeline/base.py similarity index 100% rename from services/pipeline/base.py rename to src/codebase_rag/services/pipeline/base.py diff --git a/services/pipeline/embeddings.py b/src/codebase_rag/services/pipeline/embeddings.py similarity index 100% rename from services/pipeline/embeddings.py rename to src/codebase_rag/services/pipeline/embeddings.py diff --git a/services/pipeline/loaders.py b/src/codebase_rag/services/pipeline/loaders.py similarity index 100% rename from services/pipeline/loaders.py rename to src/codebase_rag/services/pipeline/loaders.py diff --git a/services/pipeline/pipeline.py b/src/codebase_rag/services/pipeline/pipeline.py similarity index 100% rename from services/pipeline/pipeline.py rename to src/codebase_rag/services/pipeline/pipeline.py diff --git a/services/pipeline/storers.py b/src/codebase_rag/services/pipeline/storers.py similarity index 100% rename from services/pipeline/storers.py rename to src/codebase_rag/services/pipeline/storers.py diff --git a/services/pipeline/transformers.py b/src/codebase_rag/services/pipeline/transformers.py similarity index 100% rename from services/pipeline/transformers.py rename to src/codebase_rag/services/pipeline/transformers.py diff --git a/src/codebase_rag/services/sql/__init__.py b/src/codebase_rag/services/sql/__init__.py new file mode 100644 index 0000000..5c8171e --- /dev/null +++ b/src/codebase_rag/services/sql/__init__.py @@ -0,0 +1,10 @@ +"""SQL parsing and schema analysis services.""" + +from codebase_rag.services.sql.sql_parser import SQLParser, sql_analyzer +from codebase_rag.services.sql.sql_schema_parser import SQLSchemaParser +from codebase_rag.services.sql.universal_sql_schema_parser import ( + UniversalSQLSchemaParser, + parse_sql_schema_smart, +) + +__all__ = ["SQLParser", "SQLSchemaParser", "UniversalSQLSchemaParser", "sql_analyzer", "parse_sql_schema_smart"] diff --git a/services/sql_parser.py b/src/codebase_rag/services/sql/sql_parser.py similarity index 100% rename from services/sql_parser.py rename to src/codebase_rag/services/sql/sql_parser.py diff --git a/services/sql_schema_parser.py b/src/codebase_rag/services/sql/sql_schema_parser.py similarity index 100% rename from services/sql_schema_parser.py rename to src/codebase_rag/services/sql/sql_schema_parser.py diff --git a/services/universal_sql_schema_parser.py b/src/codebase_rag/services/sql/universal_sql_schema_parser.py similarity index 100% rename from services/universal_sql_schema_parser.py rename to src/codebase_rag/services/sql/universal_sql_schema_parser.py diff --git a/src/codebase_rag/services/tasks/__init__.py b/src/codebase_rag/services/tasks/__init__.py new file mode 100644 index 0000000..c2f8c9e --- /dev/null +++ b/src/codebase_rag/services/tasks/__init__.py @@ -0,0 +1,7 @@ +"""Task queue and processing services.""" + +from codebase_rag.services.tasks.task_queue import TaskQueue, task_queue, TaskStatus +from codebase_rag.services.tasks.task_storage import TaskStorage, TaskType +from codebase_rag.services.tasks.task_processors import TaskProcessor, processor_registry + +__all__ = ["TaskQueue", "TaskStorage", "TaskProcessor", "task_queue", "TaskStatus", "TaskType", "processor_registry"] diff --git a/services/task_processors.py b/src/codebase_rag/services/tasks/task_processors.py similarity index 100% rename from services/task_processors.py rename to src/codebase_rag/services/tasks/task_processors.py diff --git a/services/task_queue.py b/src/codebase_rag/services/tasks/task_queue.py similarity index 100% rename from services/task_queue.py rename to src/codebase_rag/services/tasks/task_queue.py diff --git a/services/task_storage.py b/src/codebase_rag/services/tasks/task_storage.py similarity index 99% rename from services/task_storage.py rename to src/codebase_rag/services/tasks/task_storage.py index 5b78c8c..41efe9b 100644 --- a/services/task_storage.py +++ b/src/codebase_rag/services/tasks/task_storage.py @@ -13,7 +13,7 @@ from dataclasses import dataclass, asdict from pathlib import Path from loguru import logger -from config import settings +from codebase_rag.config import settings from .task_queue import TaskResult, TaskStatus diff --git a/src/codebase_rag/services/utils/__init__.py b/src/codebase_rag/services/utils/__init__.py new file mode 100644 index 0000000..6287d6f --- /dev/null +++ b/src/codebase_rag/services/utils/__init__.py @@ -0,0 +1,7 @@ +"""Utility services for git, ranking, and metrics.""" + +from codebase_rag.services.utils.git_utils import GitUtils, git_utils +from codebase_rag.services.utils.ranker import Ranker, ranker +from codebase_rag.services.utils.metrics import MetricsCollector, metrics_service + +__all__ = ["GitUtils", "Ranker", "MetricsCollector", "git_utils", "ranker", "metrics_service"] diff --git a/services/git_utils.py b/src/codebase_rag/services/utils/git_utils.py similarity index 100% rename from services/git_utils.py rename to src/codebase_rag/services/utils/git_utils.py diff --git a/services/metrics.py b/src/codebase_rag/services/utils/metrics.py similarity index 99% rename from services/metrics.py rename to src/codebase_rag/services/utils/metrics.py index 9bc3eaf..798cd04 100644 --- a/services/metrics.py +++ b/src/codebase_rag/services/utils/metrics.py @@ -7,7 +7,7 @@ import time from functools import wraps from loguru import logger -from config import settings +from codebase_rag.config import settings # Create a custom registry to avoid conflicts registry = CollectorRegistry() diff --git a/services/ranker.py b/src/codebase_rag/services/utils/ranker.py similarity index 100% rename from services/ranker.py rename to src/codebase_rag/services/utils/ranker.py diff --git a/start.py b/start.py deleted file mode 100644 index b3f1004..0000000 --- a/start.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python3 -""" -Code Graph Knowledge Service -""" - -import asyncio -import sys -import time -from pathlib import Path - -# add project root to path -sys.path.insert(0, str(Path(__file__).parent)) - -from config import settings, validate_neo4j_connection, validate_ollama_connection, validate_openrouter_connection, get_current_model_info -from loguru import logger - -def check_dependencies(): - """check service dependencies""" - logger.info("check service dependencies...") - - checks = [ - ("Neo4j", validate_neo4j_connection), - ] - - # Conditionally add Ollama if it is the selected LLM or embedding provider - if settings.llm_provider == "ollama" or settings.embedding_provider == "ollama": - checks.append(("Ollama", validate_ollama_connection)) - - # Conditionally add OpenRouter if it is the selected LLM or embedding provider - if settings.llm_provider == "openrouter" or settings.embedding_provider == "openrouter": - checks.append(("OpenRouter", validate_openrouter_connection)) - - all_passed = True - for service_name, check_func in checks: - try: - if check_func(): - logger.info(f"โœ“ {service_name} connection successful") - else: - logger.error(f"โœ— {service_name} connection failed") - all_passed = False - except Exception as e: - logger.error(f"โœ— {service_name} check error: {e}") - all_passed = False - - return all_passed - -def wait_for_services(max_retries=30, retry_interval=2): - """wait for services to start""" - logger.info("wait for services to start...") - - for attempt in range(1, max_retries + 1): - logger.info(f"try {attempt}/{max_retries}...") - - if check_dependencies(): - logger.info("all services are ready!") - return True - - if attempt < max_retries: - logger.info(f"wait {retry_interval} seconds and retry...") - time.sleep(retry_interval) - - logger.error("service startup timeout!") - return False - -def print_startup_info(): - """print startup info""" - print("\n" + "="*60) - print("Code Graph Knowledge Service") - print("="*60) - print(f"version: {settings.app_version}") - print(f"host: {settings.host}:{settings.port}") - print(f"debug mode: {settings.debug}") - print() - print("service config:") - print(f" Neo4j: {settings.neo4j_uri}") - print(f" Ollama: {settings.ollama_base_url}") - print() - model_info = get_current_model_info() - print("model config:") - print(f" LLM: {model_info['llm_model']}") - print(f" Embedding: {model_info['embedding_model']}") - print("="*60) - print() - -def main(): - """main function""" - print_startup_info() - - # check Python version - if sys.version_info < (3, 8): - logger.error("Python 3.8 or higher is required") - sys.exit(1) - - # check environment variables - logger.info("check environment config...") - - # optional: wait for services to start (useful in development) - if not settings.debug or input("skip service dependency check? (y/N): ").lower().startswith('y'): - logger.info("skip service dependency check") - else: - if not wait_for_services(): - logger.error("service dependency check failed, continue startup may encounter problems") - if not input("continue startup? (y/N): ").lower().startswith('y'): - sys.exit(1) - - # start application - logger.info("start FastAPI application...") - - try: - from main import start_server - start_server() - except KeyboardInterrupt: - logger.info("service interrupted by user") - except Exception as e: - logger.error(f"start failed: {e}") - sys.exit(1) - -if __name__ == "__main__": - main() diff --git a/start_mcp.py b/start_mcp.py deleted file mode 100644 index 3a7b9bd..0000000 --- a/start_mcp.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -MCP Server v2 Startup Script - -Starts the official MCP SDK-based server with enhanced features: -- Session management -- Streaming responses (ready for future use) -- Multi-transport support -- Focus on Memory Store tools - -Usage: - python start_mcp_v2.py - -Configuration: - Add to Claude Desktop config: - { - "mcpServers": { - "codebase-rag-memory-v2": { - "command": "python", - "args": ["/path/to/start_mcp_v2.py"], - "env": {} - } - } - } -""" - -import asyncio -import sys -from pathlib import Path - -from loguru import logger - -# Configure logging -logger.remove() # Remove default handler -logger.add( - sys.stderr, - level="INFO", - format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {message}" -) - -# Add project root to path -project_root = Path(__file__).parent -sys.path.insert(0, str(project_root)) - - -def main(): - """Main entry point""" - try: - logger.info("=" * 70) - logger.info("MCP Server v2 (Official SDK) - Memory Store") - logger.info("=" * 70) - logger.info(f"Python: {sys.version}") - logger.info(f"Working directory: {Path.cwd()}") - - # Import and run the server - from mcp_server_v2 import main as server_main - - logger.info("Starting server...") - asyncio.run(server_main()) - - except KeyboardInterrupt: - logger.info("\nServer stopped by user") - sys.exit(0) - except Exception as e: - logger.error(f"Server failed to start: {e}", exc_info=True) - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/tests/conftest.py b/tests/conftest.py index 110c231..ad97d98 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) from fastapi.testclient import TestClient -from services.graph_service import Neo4jGraphService +from src.codebase_rag.services.code import Neo4jGraphService @pytest.fixture(scope="session") @@ -81,7 +81,7 @@ def graph_service(): @pytest.fixture(scope="module") def test_client(): """FastAPI test client""" - from main import app + from src.codebase_rag.server.web import app return TestClient(app) diff --git a/tests/test_context_pack.py b/tests/test_context_pack.py index 80e5edb..78f2636 100644 --- a/tests/test_context_pack.py +++ b/tests/test_context_pack.py @@ -3,7 +3,7 @@ Tests GET /context/pack endpoint """ import pytest -from services.pack_builder import PackBuilder +from src.codebase_rag.services.pipeline import PackBuilder class TestPackBuilder: diff --git a/tests/test_ingest.py b/tests/test_ingest.py index 7b5092f..a3efbcb 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -3,8 +3,7 @@ Tests POST /ingest/repo endpoint """ import pytest -from services.code_ingestor import CodeIngestor -from services.graph_service import Neo4jGraphService +from src.codebase_rag.services.code import CodeIngestor, Neo4jGraphService class TestCodeIngestor: diff --git a/tests/test_mcp_handlers.py b/tests/test_mcp_handlers.py index c5a03d3..c031563 100644 --- a/tests/test_mcp_handlers.py +++ b/tests/test_mcp_handlers.py @@ -17,20 +17,20 @@ import asyncio # Import handlers -from mcp_tools.knowledge_handlers import ( +from src.codebase_rag.mcp.handlers.knowledge import ( handle_query_knowledge, handle_search_similar_nodes, handle_add_document, handle_add_file, handle_add_directory, ) -from mcp_tools.code_handlers import ( +from src.codebase_rag.mcp.handlers.code import ( handle_code_graph_ingest_repo, handle_code_graph_related, handle_code_graph_impact, handle_context_pack, ) -from mcp_tools.memory_handlers import ( +from src.codebase_rag.mcp.handlers.memory import ( handle_add_memory, handle_search_memories, handle_get_memory, @@ -39,7 +39,7 @@ handle_supersede_memory, handle_get_project_summary, ) -from mcp_tools.task_handlers import ( +from src.codebase_rag.mcp.handlers.tasks import ( handle_get_task_status, handle_watch_task, handle_watch_tasks, @@ -47,7 +47,7 @@ handle_cancel_task, handle_get_queue_stats, ) -from mcp_tools.system_handlers import ( +from src.codebase_rag.mcp.handlers.system import ( handle_get_graph_schema, handle_get_statistics, handle_clear_knowledge_base, diff --git a/tests/test_mcp_integration.py b/tests/test_mcp_integration.py index 4297ad4..eaa506b 100644 --- a/tests/test_mcp_integration.py +++ b/tests/test_mcp_integration.py @@ -15,9 +15,9 @@ from unittest.mock import AsyncMock, Mock, patch import json -from mcp_tools.tool_definitions import get_tool_definitions -from mcp_tools.resources import get_resource_list, read_resource_content -from mcp_tools.prompts import get_prompt_list, get_prompt_content +from src.codebase_rag.mcp.tools import get_tool_definitions +from src.codebase_rag.mcp.resources import get_resource_list, read_resource_content +from src.codebase_rag.mcp.prompts import get_prompt_list, get_prompt_content class TestToolDefinitions: @@ -309,7 +309,7 @@ class TestToolExecutionRouting: @pytest.mark.asyncio async def test_knowledge_tool_routing(self, mock_knowledge_service): """Test that knowledge tools route to correct service""" - from mcp_tools.knowledge_handlers import handle_query_knowledge + from src.codebase_rag.mcp.handlers.knowledge import handle_query_knowledge mock_knowledge_service.query.return_value = { "success": True, @@ -327,7 +327,7 @@ async def test_knowledge_tool_routing(self, mock_knowledge_service): @pytest.mark.asyncio async def test_memory_tool_routing(self, mock_memory_store): """Test that memory tools route to correct service""" - from mcp_tools.memory_handlers import handle_add_memory + from src.codebase_rag.mcp.handlers.memory import handle_add_memory mock_memory_store.add_memory.return_value = { "success": True, @@ -350,7 +350,7 @@ async def test_memory_tool_routing(self, mock_memory_store): @pytest.mark.asyncio async def test_task_tool_routing(self, mock_task_queue, mock_task_status): """Test that task tools route to correct service""" - from mcp_tools.task_handlers import handle_get_queue_stats + from src.codebase_rag.mcp.handlers.tasks import handle_get_queue_stats mock_task_queue.get_stats.return_value = { "pending": 5, @@ -368,7 +368,7 @@ async def test_task_tool_routing(self, mock_task_queue, mock_task_status): @pytest.mark.asyncio async def test_system_tool_routing(self, mock_knowledge_service): """Test that system tools route to correct service""" - from mcp_tools.system_handlers import handle_get_statistics + from src.codebase_rag.mcp.handlers.system import handle_get_statistics mock_knowledge_service.get_statistics.return_value = { "success": True, @@ -390,7 +390,7 @@ class TestErrorHandlingPatterns: @pytest.mark.asyncio async def test_knowledge_service_error(self, mock_knowledge_service): """Test knowledge service error handling""" - from mcp_tools.knowledge_handlers import handle_query_knowledge + from src.codebase_rag.mcp.handlers.knowledge import handle_query_knowledge mock_knowledge_service.query.return_value = { "success": False, @@ -408,7 +408,7 @@ async def test_knowledge_service_error(self, mock_knowledge_service): @pytest.mark.asyncio async def test_memory_store_error(self, mock_memory_store): """Test memory store error handling""" - from mcp_tools.memory_handlers import handle_get_memory + from src.codebase_rag.mcp.handlers.memory import handle_get_memory mock_memory_store.get_memory.return_value = { "success": False, @@ -426,7 +426,7 @@ async def test_memory_store_error(self, mock_memory_store): @pytest.mark.asyncio async def test_task_queue_error(self, mock_task_queue, mock_task_status): """Test task queue error handling""" - from mcp_tools.task_handlers import handle_get_task_status + from src.codebase_rag.mcp.handlers.tasks import handle_get_task_status mock_task_queue.get_task.return_value = None @@ -442,7 +442,7 @@ async def test_task_queue_error(self, mock_task_queue, mock_task_status): @pytest.mark.asyncio async def test_code_handler_exception(self, mock_code_ingestor, mock_git_utils): """Test code handler exception handling""" - from mcp_tools.code_handlers import handle_code_graph_ingest_repo + from src.codebase_rag.mcp.handlers.code import handle_code_graph_ingest_repo mock_git_utils.is_git_repo.side_effect = Exception("Git error") @@ -462,7 +462,7 @@ class TestAsyncTaskHandling: @pytest.mark.asyncio async def test_large_document_async_processing(self, mock_knowledge_service, mock_submit_document_task): """Test large documents trigger async processing""" - from mcp_tools.knowledge_handlers import handle_add_document + from src.codebase_rag.mcp.handlers.knowledge import handle_add_document mock_submit_document_task.return_value = "task-123" large_content = "x" * 15000 # 15KB @@ -481,7 +481,7 @@ async def test_large_document_async_processing(self, mock_knowledge_service, moc @pytest.mark.asyncio async def test_directory_always_async(self, mock_submit_directory_task): """Test directory processing always uses async""" - from mcp_tools.knowledge_handlers import handle_add_directory + from src.codebase_rag.mcp.handlers.knowledge import handle_add_directory mock_submit_directory_task.return_value = "task-456" @@ -497,7 +497,7 @@ async def test_directory_always_async(self, mock_submit_directory_task): @pytest.mark.asyncio async def test_watch_task_monitors_progress(self, mock_task_queue, mock_task_status): """Test watch_task monitors task until completion""" - from mcp_tools.task_handlers import handle_watch_task + from src.codebase_rag.mcp.handlers.tasks import handle_watch_task # Simulate task completing immediately mock_task = Mock() @@ -525,7 +525,7 @@ class TestDataValidation: @pytest.mark.asyncio async def test_clear_knowledge_base_requires_confirmation(self, mock_knowledge_service): """Test clear_knowledge_base requires explicit confirmation""" - from mcp_tools.system_handlers import handle_clear_knowledge_base + from src.codebase_rag.mcp.handlers.system import handle_clear_knowledge_base # Without confirmation result = await handle_clear_knowledge_base( @@ -555,7 +555,7 @@ async def test_clear_knowledge_base_requires_confirmation(self, mock_knowledge_s @pytest.mark.asyncio async def test_memory_importance_defaults(self, mock_memory_store): """Test memory importance has sensible default""" - from mcp_tools.memory_handlers import handle_add_memory + from src.codebase_rag.mcp.handlers.memory import handle_add_memory mock_memory_store.add_memory.return_value = { "success": True, @@ -580,7 +580,7 @@ async def test_memory_importance_defaults(self, mock_memory_store): @pytest.mark.asyncio async def test_search_top_k_defaults(self, mock_knowledge_service): """Test search top_k has sensible default""" - from mcp_tools.knowledge_handlers import handle_search_similar_nodes + from src.codebase_rag.mcp.handlers.knowledge import handle_search_similar_nodes mock_knowledge_service.search_similar_nodes.return_value = { "success": True, diff --git a/tests/test_mcp_utils.py b/tests/test_mcp_utils.py index 37c4881..39981bd 100644 --- a/tests/test_mcp_utils.py +++ b/tests/test_mcp_utils.py @@ -8,7 +8,7 @@ """ import pytest -from mcp_tools.utils import format_result +from src.codebase_rag.mcp.utils import format_result class TestFormatResult: diff --git a/tests/test_memory_store.py b/tests/test_memory_store.py index 16a9bff..2e69a10 100644 --- a/tests/test_memory_store.py +++ b/tests/test_memory_store.py @@ -7,7 +7,7 @@ import pytest import asyncio -from services.memory_store import MemoryStore +from src.codebase_rag.services.memory import MemoryStore # Test fixtures diff --git a/tests/test_related.py b/tests/test_related.py index f2ab078..2319200 100644 --- a/tests/test_related.py +++ b/tests/test_related.py @@ -3,7 +3,7 @@ Tests GET /graph/related endpoint """ import pytest -from services.ranker import Ranker +from src.codebase_rag.services.utils import Ranker class TestRanker: